1 //
   2 // Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 dnl Generate the warning
  27 // This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
  28 dnl
  29 
  30 // AArch64 SVE Architecture Description File
  31 
  32 dnl
  33 define(`TYPE2DATATYPE',
  34 `ifelse($1, `B', `BYTE',
  35         $1, `S', `SHORT',
  36         $1, `I', `INT',
  37         $1, `L', `LONG',
  38         $1, `F', `FLOAT',
  39         $1, `D', `DOUBLE',
  40         `error($1)')')dnl
  41 dnl
  42 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1,            $2,       $3       $4   )
  43 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len, scale)
  44 define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
  45 operand vmemA_imm$1Offset$3()
  46 %{
  47   // (esize / msize) = $4
  48   predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
  49             Matcher::scalable_vector_reg_size(T_BYTE)ifelse($4, `1', `', ` / $4')));
  50   match(Con$1);
  51 
  52   op_cost(0);
  53   format %{ %}
  54   interface(CONST_INTER);
  55 %}')dnl
  56 
  57 // 4 bit signed offset -- for predicated load/store
  58 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int,  4, 1)
  59 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4, 1)
  60 dnl
  61 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1,            $2     )
  62 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
  63 define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
  64 operand vmemA_indOff$1$2$3(iRegP reg, vmemA_imm$1Offset$2 off)
  65 %{
  66   constraint(ALLOC_IN_RC(ptr_reg));
  67   match(AddP reg off);
  68   op_cost(0);
  69   format %{ "[$reg, $off]" %}
  70   interface(MEMORY_INTER) %{
  71     base($reg);
  72     `index'(0xffffffff);
  73     scale(0x0);
  74     disp($off);
  75   %}
  76 %}')dnl
  77 OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
  78 OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
  79 
  80 // The indOff of vmemA is valid only when the vector element (load to/store from)
  81 // size equals to memory element (load from/store to) size.
  82 opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  83 
  84 source_hpp %{
  85   bool op_sve_supported(int opcode, int vlen, BasicType bt);
  86 %}
  87 
  88 source %{
  89 
  90   typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
  91                                                              PRegister Pg, const Address &adr);
  92 
  93   // Predicated load/store, with optional ptrue to all elements of given predicate register.
  94   static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
  95                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
  96                                     int opcode, Register base, int index, int size, int disp) {
  97     sve_mem_insn_predicate insn;
  98     int mesize = type2aelembytes(mem_elem_bt);
  99     if (index == -1) {
 100       assert(size == 0, "unsupported address mode: scale size = %d", size);
 101       switch(mesize) {
 102       case 1:
 103         insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
 104         break;
 105       case 2:
 106         insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
 107         break;
 108       case 4:
 109         insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
 110         break;
 111       case 8:
 112         insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
 113         break;
 114       default:
 115         assert(false, "unsupported");
 116         ShouldNotReachHere();
 117       }
 118       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
 119       (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
 120     } else {
 121       assert(false, "unimplemented");
 122       ShouldNotReachHere();
 123     }
 124   }
 125 
 126   bool op_sve_supported(int opcode, int vlen, BasicType bt) {
 127     int length_in_bytes = vlen * type2aelembytes(bt);
 128     switch (opcode) {
 129       case Op_MulAddVS2VI:
 130       // No multiply reduction instructions
 131       case Op_MulReductionVD:
 132       case Op_MulReductionVF:
 133       case Op_MulReductionVI:
 134       case Op_MulReductionVL:
 135       // Others
 136       case Op_ExtractC:
 137       case Op_ExtractUB:
 138         return false;
 139       // Vector API specific
 140       case Op_VectorLoadShuffle:
 141       case Op_VectorRearrange:
 142         if (vlen < 4 || length_in_bytes > MaxVectorSize) {
 143           return false;
 144         } else {
 145           return true;
 146         }
 147       case Op_LoadVector:
 148       case Op_StoreVector:
 149         return Matcher::vector_size_supported(bt, vlen);
 150       default:
 151         break;
 152     }
 153     // By default, we only support vector operations with no less than 8 bytes and 2 elements.
 154     return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
 155   }
 156 %}
 157 
 158 definitions %{
 159   int_def SVE_COST             (200, 200);
 160 %}
 161 
 162 dnl
 163 dnl ELEMENT_SHORT_CHART($1, $2)
 164 dnl ELEMENT_SHORT_CHART(etype, node)
 165 define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
 166   `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
 167             ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
 168    `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')dnl
 169 dnl
 170 
 171 // All SVE instructions
 172 
 173 // vector load/store
 174 
 175 // Unpredicated vector load/store
 176 instruct loadV(vReg dst, vmemA mem) %{
 177   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 &&
 178             n->as_LoadVector()->memory_size() == MaxVectorSize);
 179   match(Set dst (LoadVector mem));
 180   ins_cost(4 * SVE_COST);
 181   format %{ "sve_ldr $dst, $mem\t# vector (sve)" %}
 182   ins_encode %{
 183     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 184     BasicType bt = Matcher::vector_element_basic_type(this);
 185     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 186                           bt, bt, $mem->opcode(),
 187                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 188   %}
 189   ins_pipe(pipe_slow);
 190 %}
 191 
 192 instruct storeV(vReg src, vmemA mem) %{
 193   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 &&
 194             n->as_StoreVector()->memory_size() == MaxVectorSize);
 195   match(Set mem (StoreVector mem src));
 196   ins_cost(4 * SVE_COST);
 197   format %{ "sve_str $mem, $src\t# vector (sve)" %}
 198   ins_encode %{
 199     FloatRegister src_reg = as_FloatRegister($src$$reg);
 200     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 201     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
 202                           bt, bt, $mem->opcode(),
 203                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 204   %}
 205   ins_pipe(pipe_slow);
 206 %}dnl
 207 
 208 dnl
 209 define(`VLoadStore', `
 210 // ifelse(load, $3, Load, Store) Vector ($6 bits)
 211 instruct $3V$4_vreg`'(vReg $7, vmem$4 mem)
 212 %{
 213   predicate(UseSVE > 0 && `n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4);
 214   match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src)));
 215   ins_cost(4 * INSN_COST);
 216   format %{ "$1   ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %}
 217   ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) );
 218   ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64));
 219 %}')dnl
 220 dnl        $1    $2 $3     $4  $5 $6   $7
 221 VLoadStore(ldrh, H, load,  2,  D, 16,  dst)
 222 VLoadStore(strh, H, store, 2,  D, 16,  src)
 223 VLoadStore(ldrs, S, load,  4,  D, 32,  dst)
 224 VLoadStore(strs, S, store, 4,  D, 32,  src)
 225 VLoadStore(ldrd, D, load,  8,  D, 64,  dst)
 226 VLoadStore(strd, D, store, 8,  D, 64,  src)
 227 VLoadStore(ldrq, Q, load, 16,  X, 128, dst)
 228 VLoadStore(strq, Q, store, 16, X, 128, src)
 229 
 230 // Predicated vector load/store, based on the vector length of the node.
 231 // Only load/store values in the range of the memory_size. This is needed
 232 // when the memory_size is lower than the hardware supported max vector size.
 233 // And this might happen for Vector API mask vector load/store.
 234 instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
 235   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
 236             n->as_LoadVector()->memory_size() < MaxVectorSize);
 237   match(Set dst (LoadVector mem));
 238   effect(TEMP pTmp, KILL cr);
 239   ins_cost(6 * SVE_COST);
 240   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
 241             "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %}
 242   ins_encode %{
 243     BasicType bt = Matcher::vector_element_basic_type(this);
 244     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
 245                           Matcher::vector_length(this));
 246     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 247     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
 248                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
 249                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 250   %}
 251   ins_pipe(pipe_slow);
 252 %}
 253 
 254 instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
 255   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
 256             n->as_StoreVector()->memory_size() < MaxVectorSize);
 257   match(Set mem (StoreVector mem src));
 258   effect(TEMP pTmp, KILL cr);
 259   ins_cost(5 * SVE_COST);
 260   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
 261             "sve_str $src, $pTmp, $mem\t# store vector predicated" %}
 262   ins_encode %{
 263     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 264     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
 265                           Matcher::vector_length(this, $src));
 266     FloatRegister src_reg = as_FloatRegister($src$$reg);
 267     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
 268                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
 269                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 270   %}
 271   ins_pipe(pipe_slow);
 272 %}dnl
 273 
 274 
 275 // vector reinterpret
 276 
 277 instruct reinterpret(vReg dst) %{
 278   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
 279                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst
 280   match(Set dst (VectorReinterpret dst));
 281   ins_cost(0);
 282   format %{ "# reinterpret $dst\t# do nothing" %}
 283   ins_encode %{
 284     // empty
 285   %}
 286   ins_pipe(pipe_class_empty);
 287 %}
 288 
 289 instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{
 290   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
 291                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
 292   match(Set dst (VectorReinterpret src));
 293   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
 294   ins_cost(3 * SVE_COST);
 295   format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
 296   ins_encode %{
 297     uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
 298     uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
 299     uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
 300                                   length_in_bytes_src : length_in_bytes_dst;
 301     assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
 302            "invalid vector length");
 303     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize);
 304     __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
 305     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg),
 306                as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
 307   %}
 308   ins_pipe(pipe_slow);
 309 %}
 310 dnl
 311 dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1,        $2,      $3,           $4,   $5,          %6  )
 312 dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
 313 define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
 314 instruct $1(vReg dst, vReg src) %{
 315   predicate(UseSVE > 0 &&
 316             n->bottom_type()->is_vect()->element_basic_type() == $3);
 317   match(Set dst ($2 src));
 318   ins_cost(SVE_COST);
 319   format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
 320   ins_encode %{
 321     __ $6(as_FloatRegister($dst$$reg), __ $4,
 322          ptrue, as_FloatRegister($src$$reg));
 323   %}
 324   ins_pipe(pipe_slow);
 325 %}')dnl
 326 dnl
 327 
 328 // vector abs
 329 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE,   B, 16, sve_abs)
 330 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT,  H, 8,  sve_abs)
 331 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT,    S, 4,  sve_abs)
 332 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG,   D, 2,  sve_abs)
 333 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT,  S, 4,  sve_fabs)
 334 UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2,  sve_fabs)
 335 dnl
 336 dnl BINARY_OP_UNPREDICATED($1,        $2       $3,   $4           $5  )
 337 dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
 338 define(`BINARY_OP_UNPREDICATED', `
 339 instruct $1(vReg dst, vReg src1, vReg src2) %{
 340   predicate(UseSVE > 0);
 341   match(Set dst ($2 src1 src2));
 342   ins_cost(SVE_COST);
 343   format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
 344   ins_encode %{
 345     __ $5(as_FloatRegister($dst$$reg), __ $3,
 346          as_FloatRegister($src1$$reg),
 347          as_FloatRegister($src2$$reg));
 348   %}
 349   ins_pipe(pipe_slow);
 350 %}')dnl
 351 
 352 // vector add
 353 BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
 354 BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8,  sve_add)
 355 BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4,  sve_add)
 356 BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2,  sve_add)
 357 BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4,  sve_fadd)
 358 BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2,  sve_fadd)
 359 dnl
 360 dnl BINARY_OP_UNSIZED($1,        $2,      $3,          $4  )
 361 dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
 362 define(`BINARY_OP_UNSIZED', `
 363 instruct $1(vReg dst, vReg src1, vReg src2) %{
 364   predicate(UseSVE > 0);
 365   match(Set dst ($2 src1 src2));
 366   ins_cost(SVE_COST);
 367   format %{ "$4  $dst, $src1, $src2\t# vector (sve)" %}
 368   ins_encode %{
 369     __ $4(as_FloatRegister($dst$$reg),
 370          as_FloatRegister($src1$$reg),
 371          as_FloatRegister($src2$$reg));
 372   %}
 373   ins_pipe(pipe_slow);
 374 %}')dnl
 375 
 376 // vector and
 377 BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
 378 
 379 // vector or
 380 BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
 381 
 382 // vector xor
 383 BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
 384 
 385 // vector not
 386 dnl
 387 define(`MATCH_RULE', `ifelse($1, I,
 388 `match(Set dst (XorV src (ReplicateB m1)));
 389   match(Set dst (XorV src (ReplicateS m1)));
 390   match(Set dst (XorV src (ReplicateI m1)));',
 391 `match(Set dst (XorV src (ReplicateL m1)));')')dnl
 392 dnl
 393 define(`VECTOR_NOT', `
 394 instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{
 395   predicate(UseSVE > 0);
 396   MATCH_RULE($1)
 397   ins_cost(SVE_COST);
 398   format %{ "sve_not $dst, $src\t# vector (sve) $2" %}
 399   ins_encode %{
 400     __ sve_not(as_FloatRegister($dst$$reg), __ D,
 401                ptrue, as_FloatRegister($src$$reg));
 402   %}
 403   ins_pipe(pipe_slow);
 404 %}')dnl
 405 dnl        $1,$2
 406 VECTOR_NOT(I, B/H/S)
 407 VECTOR_NOT(L, D)
 408 undefine(MATCH_RULE)
 409 
 410 // vector and_not
 411 dnl
 412 define(`MATCH_RULE', `ifelse($1, I,
 413 `match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
 414   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
 415   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
 416 `match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
 417 dnl
 418 define(`VECTOR_AND_NOT', `
 419 instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{
 420   predicate(UseSVE > 0);
 421   MATCH_RULE($1)
 422   ins_cost(SVE_COST);
 423   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %}
 424   ins_encode %{
 425     __ sve_bic(as_FloatRegister($dst$$reg),
 426                as_FloatRegister($src1$$reg),
 427                as_FloatRegister($src2$$reg));
 428   %}
 429   ins_pipe(pipe_slow);
 430 %}')dnl
 431 dnl            $1,$2
 432 VECTOR_AND_NOT(I, B/H/S)
 433 VECTOR_AND_NOT(L, D)
 434 undefine(MATCH_RULE)
 435 dnl
 436 dnl VDIVF($1,          $2  , $3         )
 437 dnl VDIVF(name_suffix, size, min_vec_len)
 438 define(`VDIVF', `
 439 instruct vdiv$1(vReg dst_src1, vReg src2) %{
 440   predicate(UseSVE > 0);
 441   match(Set dst_src1 (DivV$1 dst_src1 src2));
 442   ins_cost(SVE_COST);
 443   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
 444   ins_encode %{
 445     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
 446          ptrue, as_FloatRegister($src2$$reg));
 447   %}
 448   ins_pipe(pipe_slow);
 449 %}')dnl
 450 
 451 // vector float div
 452 VDIVF(F, S, 4)
 453 VDIVF(D, D, 2)
 454 
 455 // vector min/max
 456 
 457 instruct vmin(vReg dst_src1, vReg src2) %{
 458   predicate(UseSVE > 0);
 459   match(Set dst_src1 (MinV dst_src1 src2));
 460   ins_cost(SVE_COST);
 461   format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 462   ins_encode %{
 463     BasicType bt = Matcher::vector_element_basic_type(this);
 464     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 465     if (is_floating_point_type(bt)) {
 466       __ sve_fmin(as_FloatRegister($dst_src1$$reg), size,
 467                   ptrue, as_FloatRegister($src2$$reg));
 468     } else {
 469       assert(is_integral_type(bt), "Unsupported type");
 470       __ sve_smin(as_FloatRegister($dst_src1$$reg), size,
 471                   ptrue, as_FloatRegister($src2$$reg));
 472     }
 473   %}
 474   ins_pipe(pipe_slow);
 475 %}
 476 
 477 instruct vmax(vReg dst_src1, vReg src2) %{
 478   predicate(UseSVE > 0);
 479   match(Set dst_src1 (MaxV dst_src1 src2));
 480   ins_cost(SVE_COST);
 481   format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 482   ins_encode %{
 483     BasicType bt = Matcher::vector_element_basic_type(this);
 484     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 485     if (is_floating_point_type(bt)) {
 486       __ sve_fmax(as_FloatRegister($dst_src1$$reg), size,
 487                   ptrue, as_FloatRegister($src2$$reg));
 488     } else {
 489       assert(is_integral_type(bt), "Unsupported type");
 490       __ sve_smax(as_FloatRegister($dst_src1$$reg), size,
 491                   ptrue, as_FloatRegister($src2$$reg));
 492     }
 493   %}
 494   ins_pipe(pipe_slow);
 495 %}
 496 
 497 dnl
 498 dnl VFMLA($1           $2    $3         )
 499 dnl VFMLA(name_suffix, size, min_vec_len)
 500 define(`VFMLA', `
 501 // dst_src1 = dst_src1 + src2 * src3
 502 instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
 503   predicate(UseFMA && UseSVE > 0);
 504   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
 505   ins_cost(SVE_COST);
 506   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 507   ins_encode %{
 508     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
 509          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 510   %}
 511   ins_pipe(pipe_slow);
 512 %}')dnl
 513 dnl
 514 // vector fmla
 515 VFMLA(F, S, 4)
 516 VFMLA(D, D, 2)
 517 
 518 dnl
 519 dnl VFMLS($1           $2    $3         )
 520 dnl VFMLS(name_suffix, size, min_vec_len)
 521 define(`VFMLS', `
 522 // dst_src1 = dst_src1 + -src2 * src3
 523 // dst_src1 = dst_src1 + src2 * -src3
 524 instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
 525   predicate(UseFMA && UseSVE > 0);
 526   match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
 527   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
 528   ins_cost(SVE_COST);
 529   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 530   ins_encode %{
 531     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
 532          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 533   %}
 534   ins_pipe(pipe_slow);
 535 %}')dnl
 536 dnl
 537 // vector fmls
 538 VFMLS(F, S, 4)
 539 VFMLS(D, D, 2)
 540 
 541 dnl
 542 dnl VFNMLA($1           $2    $3         )
 543 dnl VFNMLA(name_suffix, size, min_vec_len)
 544 define(`VFNMLA', `
 545 // dst_src1 = -dst_src1 + -src2 * src3
 546 // dst_src1 = -dst_src1 + src2 * -src3
 547 instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
 548   predicate(UseFMA && UseSVE > 0);
 549   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
 550   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
 551   ins_cost(SVE_COST);
 552   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 553   ins_encode %{
 554     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
 555          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 556   %}
 557   ins_pipe(pipe_slow);
 558 %}')dnl
 559 dnl
 560 // vector fnmla
 561 VFNMLA(F, S, 4)
 562 VFNMLA(D, D, 2)
 563 
 564 dnl
 565 dnl VFNMLS($1           $2    $3         )
 566 dnl VFNMLS(name_suffix, size, min_vec_len)
 567 define(`VFNMLS', `
 568 // dst_src1 = -dst_src1 + src2 * src3
 569 instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
 570   predicate(UseFMA && UseSVE > 0);
 571   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
 572   ins_cost(SVE_COST);
 573   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 574   ins_encode %{
 575     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
 576          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 577   %}
 578   ins_pipe(pipe_slow);
 579 %}')dnl
 580 dnl
 581 // vector fnmls
 582 VFNMLS(F, S, 4)
 583 VFNMLS(D, D, 2)
 584 
 585 dnl
 586 dnl VMLA($1           $2    $3         )
 587 dnl VMLA(name_suffix, size, min_vec_len)
 588 define(`VMLA', `
 589 // dst_src1 = dst_src1 + src2 * src3
 590 instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
 591 %{
 592   predicate(UseSVE > 0);
 593   match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
 594   ins_cost(SVE_COST);
 595   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
 596   ins_encode %{
 597     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
 598       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 599   %}
 600   ins_pipe(pipe_slow);
 601 %}')dnl
 602 dnl
 603 // vector mla
 604 VMLA(B, B, 16)
 605 VMLA(S, H, 8)
 606 VMLA(I, S, 4)
 607 VMLA(L, D, 2)
 608 
 609 dnl
 610 dnl VMLS($1           $2    $3         )
 611 dnl VMLS(name_suffix, size, min_vec_len)
 612 define(`VMLS', `
 613 // dst_src1 = dst_src1 - src2 * src3
 614 instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
 615 %{
 616   predicate(UseSVE > 0);
 617   match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
 618   ins_cost(SVE_COST);
 619   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
 620   ins_encode %{
 621     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
 622       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 623   %}
 624   ins_pipe(pipe_slow);
 625 %}')dnl
 626 dnl
 627 // vector mls
 628 VMLS(B, B, 16)
 629 VMLS(S, H, 8)
 630 VMLS(I, S, 4)
 631 VMLS(L, D, 2)
 632 
 633 dnl
 634 dnl BINARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
 635 dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
 636 define(`BINARY_OP_TRUE_PREDICATE', `
 637 instruct $1(vReg dst_src1, vReg src2) %{
 638   predicate(UseSVE > 0);
 639   match(Set dst_src1 ($2 dst_src1 src2));
 640   ins_cost(SVE_COST);
 641   format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
 642   ins_encode %{
 643     __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
 644          ptrue, as_FloatRegister($src2$$reg));
 645   %}
 646   ins_pipe(pipe_slow);
 647 %}')dnl
 648 
 649 // vector mul
 650 BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
 651 BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8,  sve_mul)
 652 BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4,  sve_mul)
 653 BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2,  sve_mul)
 654 BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
 655 BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
 656 
 657 dnl
 658 dnl UNARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,            $5  )
 659 dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
 660 define(`UNARY_OP_TRUE_PREDICATE', `
 661 instruct $1(vReg dst, vReg src) %{
 662   predicate(UseSVE > 0);
 663   match(Set dst ($2 src));
 664   ins_cost(SVE_COST);
 665   format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
 666   ins_encode %{
 667     __ $5(as_FloatRegister($dst$$reg), __ $3,
 668          ptrue, as_FloatRegister($src$$reg));
 669   %}
 670   ins_pipe(pipe_slow);
 671 %}')dnl
 672 dnl
 673 // vector fneg
 674 UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
 675 UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
 676 
 677 // popcount vector
 678 
 679 instruct vpopcountI(vReg dst, vReg src) %{
 680   predicate(UseSVE > 0);
 681   match(Set dst (PopCountVI src));
 682   format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
 683   ins_encode %{
 684      __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
 685   %}
 686   ins_pipe(pipe_slow);
 687 %}
 688 
 689 // vector mask compare
 690 
 691 instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{
 692   predicate(UseSVE > 0);
 693   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
 694   effect(TEMP pTmp, KILL cr);
 695   ins_cost(2 * SVE_COST);
 696   format %{ "sve_cmp $pTmp, $src1, $src2\n\t"
 697             "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %}
 698   ins_encode %{
 699     BasicType bt = Matcher::vector_element_basic_type(this);
 700     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
 701                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
 702     __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
 703                as_PRegister($pTmp$$reg), -1, false);
 704   %}
 705   ins_pipe(pipe_slow);
 706 %}
 707 
 708 // vector blend
 709 
 710 instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{
 711   predicate(UseSVE > 0);
 712   match(Set dst (VectorBlend (Binary src1 src2) src3));
 713   effect(TEMP pTmp, KILL cr);
 714   ins_cost(2 * SVE_COST);
 715   format %{ "sve_cmpeq $pTmp, $src3, -1\n\t"
 716             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
 717   ins_encode %{
 718     Assembler::SIMD_RegVariant size =
 719       __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
 720     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
 721                ptrue, as_FloatRegister($src3$$reg), -1);
 722     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg),
 723                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
 724   %}
 725   ins_pipe(pipe_slow);
 726 %}
 727 
 728 // vector blend with compare
 729 
 730 instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3,
 731                         vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{
 732   predicate(UseSVE > 0);
 733   match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond)));
 734   effect(TEMP pTmp, KILL cr);
 735   ins_cost(2 * SVE_COST);
 736   format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t"
 737             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
 738   ins_encode %{
 739     BasicType bt = Matcher::vector_element_basic_type(this);
 740     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg),
 741                    as_FloatRegister($src4$$reg), (int)$cond$$constant);
 742     __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
 743                as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg),
 744                as_FloatRegister($src1$$reg));
 745   %}
 746   ins_pipe(pipe_slow);
 747 %}
 748 
 749 // vector load mask
 750 
 751 instruct vloadmaskB(vReg dst, vReg src) %{
 752   predicate(UseSVE > 0 &&
 753             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 754   match(Set dst (VectorLoadMask src));
 755   ins_cost(SVE_COST);
 756   format %{ "sve_neg $dst, $src\t# vector load mask (B)" %}
 757   ins_encode %{
 758     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg));
 759   %}
 760   ins_pipe(pipe_slow);
 761 %}
 762 
 763 instruct vloadmaskS(vReg dst, vReg src) %{
 764   predicate(UseSVE > 0 &&
 765             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 766   match(Set dst (VectorLoadMask src));
 767   ins_cost(2 * SVE_COST);
 768   format %{ "sve_uunpklo $dst, H, $src\n\t"
 769             "sve_neg $dst, $dst\t# vector load mask (B to H)" %}
 770   ins_encode %{
 771     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
 772     __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg));
 773   %}
 774   ins_pipe(pipe_slow);
 775 %}
 776 
 777 instruct vloadmaskI(vReg dst, vReg src) %{
 778   predicate(UseSVE > 0 &&
 779             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
 780              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
 781   match(Set dst (VectorLoadMask src));
 782   ins_cost(3 * SVE_COST);
 783   format %{ "sve_uunpklo $dst, H, $src\n\t"
 784             "sve_uunpklo $dst, S, $dst\n\t"
 785             "sve_neg $dst, $dst\t# vector load mask (B to S)" %}
 786   ins_encode %{
 787     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
 788     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
 789     __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg));
 790   %}
 791   ins_pipe(pipe_slow);
 792 %}
 793 
 794 instruct vloadmaskL(vReg dst, vReg src) %{
 795   predicate(UseSVE > 0 &&
 796             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
 797              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
 798   match(Set dst (VectorLoadMask src));
 799   ins_cost(4 * SVE_COST);
 800   format %{ "sve_uunpklo $dst, H, $src\n\t"
 801             "sve_uunpklo $dst, S, $dst\n\t"
 802             "sve_uunpklo $dst, D, $dst\n\t"
 803             "sve_neg $dst, $dst\t# vector load mask (B to D)" %}
 804   ins_encode %{
 805     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
 806     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
 807     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
 808     __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg));
 809   %}
 810   ins_pipe(pipe_slow);
 811 %}
 812 
 813 // vector store mask
 814 
 815 instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{
 816   predicate(UseSVE > 0);
 817   match(Set dst (VectorStoreMask src size));
 818   ins_cost(SVE_COST);
 819   format %{ "sve_neg $dst, $src\t# vector store mask (B)" %}
 820   ins_encode %{
 821     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
 822                as_FloatRegister($src$$reg));
 823   %}
 824   ins_pipe(pipe_slow);
 825 %}
 826 
 827 instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{
 828   predicate(UseSVE > 0);
 829   match(Set dst (VectorStoreMask src size));
 830   effect(TEMP_DEF dst, TEMP tmp);
 831   ins_cost(3 * SVE_COST);
 832   format %{ "sve_dup $tmp, H, 0\n\t"
 833             "sve_uzp1 $dst, B, $src, $tmp\n\t"
 834             "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %}
 835   ins_encode %{
 836     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
 837     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
 838                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
 839     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
 840                as_FloatRegister($dst$$reg));
 841 
 842   %}
 843   ins_pipe(pipe_slow);
 844 %}
 845 
 846 instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{
 847   predicate(UseSVE > 0);
 848   match(Set dst (VectorStoreMask src size));
 849   effect(TEMP_DEF dst, TEMP tmp);
 850   ins_cost(4 * SVE_COST);
 851   format %{ "sve_dup $tmp, S, 0\n\t"
 852             "sve_uzp1 $dst, H, $src, $tmp\n\t"
 853             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
 854             "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %}
 855   ins_encode %{
 856     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
 857     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
 858                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
 859     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
 860                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
 861     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
 862                as_FloatRegister($dst$$reg));
 863   %}
 864   ins_pipe(pipe_slow);
 865 %}
 866 
 867 instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{
 868   predicate(UseSVE > 0);
 869   match(Set dst (VectorStoreMask src size));
 870   effect(TEMP_DEF dst, TEMP tmp);
 871   ins_cost(5 * SVE_COST);
 872   format %{ "sve_dup $tmp, D, 0\n\t"
 873             "sve_uzp1 $dst, S, $src, $tmp\n\t"
 874             "sve_uzp1 $dst, H, $dst, $tmp\n\t"
 875             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
 876             "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %}
 877   ins_encode %{
 878     __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0);
 879     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S,
 880                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
 881     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
 882                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
 883     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
 884                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
 885     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
 886                as_FloatRegister($dst$$reg));
 887   %}
 888   ins_pipe(pipe_slow);
 889 %}
 890 dnl
 891 dnl
 892 dnl VLOADMASK_LOADV($1,    $2  )
 893 dnl VLOADMASK_LOADV(esize, cond)
 894 define(`VLOADMASK_LOADV', `
 895 instruct vloadmask_loadV_$1(vReg dst, ifelse($1, `byte', vmemA, indirect) mem) %{
 896   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize &&
 897             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) $2);
 898   match(Set dst (VectorLoadMask (LoadVector mem)));
 899   ins_cost(5 * SVE_COST);
 900   format %{ "sve_ld1b $dst, $mem\n\t"
 901             "sve_neg $dst, $dst\t# load vector mask (sve)" %}
 902   ins_encode %{
 903     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 904     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
 905     Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt);
 906     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 907                           T_BOOLEAN, to_vect_bt, $mem->opcode(),
 908                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 909     __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg);
 910   %}
 911   ins_pipe(pipe_slow);
 912 %}')dnl
 913 dnl
 914 define(`ARGLIST',
 915 `ifelse($1, `byte', vmemA, indirect) mem, vReg src, vReg tmp, ifelse($1, `byte', immI_1, immI_gt_1) esize')
 916 dnl
 917 dnl STOREV_VSTOREMASK($1,  )
 918 dnl STOREV_VSTOREMASK(esize)
 919 define(`STOREV_VSTOREMASK', `
 920 instruct storeV_vstoremask_$1(ARGLIST($1)) %{
 921   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() *
 922                           n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize);
 923   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
 924   effect(TEMP tmp);
 925   ins_cost(5 * SVE_COST);
 926   format %{ "sve_neg $tmp, $src\n\t"
 927             "sve_st1b $tmp, $mem\t# store vector mask (sve)" %}
 928   ins_encode %{
 929     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
 930     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
 931     Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant);
 932     __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue,
 933                as_FloatRegister($src$$reg));
 934     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
 935                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
 936                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 937   %}
 938   ins_pipe(pipe_slow);
 939 %}')dnl
 940 undefine(ARGLIST)dnl
 941 dnl
 942 // load/store mask vector
 943 VLOADMASK_LOADV(byte, == 1)
 944 VLOADMASK_LOADV(non_byte, > 1)
 945 STOREV_VSTOREMASK(byte)
 946 STOREV_VSTOREMASK(non_byte)
 947 
 948 // vector add reduction
 949 
 950 instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
 951   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
 952   match(Set dst (AddReductionVI src1 src2));
 953   effect(TEMP_DEF dst, TEMP vtmp);
 954   ins_cost(SVE_COST);
 955   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %}
 956   ins_encode %{
 957     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
 958     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
 959     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
 960     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
 961     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
 962     if (bt == T_BYTE) {
 963       __ sxtb($dst$$Register, $dst$$Register);
 964     } else if (bt == T_SHORT) {
 965       __ sxth($dst$$Register, $dst$$Register);
 966     } else {
 967       assert(bt == T_INT, "unsupported type");
 968     }
 969   %}
 970   ins_pipe(pipe_slow);
 971 %}
 972 
 973 instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
 974                              pRegGov ptmp, rFlagsReg cr) %{
 975   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
 976   match(Set dst (AddReductionVI src1 src2));
 977   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
 978   ins_cost(SVE_COST);
 979   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %}
 980   ins_encode %{
 981     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
 982     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
 983     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
 984                           Matcher::vector_length(this, $src2));
 985     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant,
 986                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
 987     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
 988     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
 989     if (bt == T_BYTE) {
 990       __ sxtb($dst$$Register, $dst$$Register);
 991     } else if (bt == T_SHORT) {
 992       __ sxth($dst$$Register, $dst$$Register);
 993     } else {
 994       assert(bt == T_INT, "unsupported type");
 995     }
 996   %}
 997   ins_pipe(pipe_slow);
 998 %}
 999 
1000 instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1001   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1002   match(Set dst (AddReductionVL src1 src2));
1003   effect(TEMP_DEF dst, TEMP vtmp);
1004   ins_cost(SVE_COST);
1005   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %}
1006   ins_encode %{
1007     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1008     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1009     __ add($dst$$Register, $dst$$Register, $src1$$Register);
1010   %}
1011   ins_pipe(pipe_slow);
1012 %}
1013 
1014 instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1015                              pRegGov ptmp, rFlagsReg cr) %{
1016   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1017   match(Set dst (AddReductionVL src1 src2));
1018   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1019   ins_cost(SVE_COST);
1020   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %}
1021   ins_encode %{
1022     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1023                           Matcher::vector_length(this, $src2));
1024     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D,
1025                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1026     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1027     __ add($dst$$Register, $dst$$Register, $src1$$Register);
1028   %}
1029   ins_pipe(pipe_slow);
1030 %}
1031 
1032 dnl
1033 dnl REDUCE_ADDF($1,        $2,      $3,      $4  )
1034 dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
1035 define(`REDUCE_ADDF', `
1036 instruct $1($3 src1_dst, vReg src2) %{
1037   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1038   match(Set src1_dst (AddReductionV$2 src1_dst src2));
1039   ins_cost(SVE_COST);
1040   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
1041   ins_encode %{
1042     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1043          ptrue, as_FloatRegister($src2$$reg));
1044   %}
1045   ins_pipe(pipe_slow);
1046 %}')dnl
1047 dnl
1048 dnl
1049 dnl REDUCE_ADDF_PARTIAL($1,        $2,     $3,      $4  )
1050 dnl REDUCE_ADDF_PARTIAL(insn_name, suffix, reg_dst, size)
1051 define(`REDUCE_ADDF_PARTIAL', `
1052 instruct $1($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
1053   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1054   match(Set src1_dst (AddReductionV$2 src1_dst src2));
1055   ins_cost(SVE_COST);
1056   effect(TEMP ptmp, KILL cr);
1057   format %{ "sve_reduce_add$2 $src1_dst, $src1_dst, $src2\t# add$2 reduction partial (sve) ($4)" %}
1058   ins_encode %{
1059     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1060                           Matcher::vector_length(this, $src2));
1061     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1062                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1063   %}
1064   ins_pipe(pipe_slow);
1065 %}')dnl
1066 dnl
1067 REDUCE_ADDF(reduce_addF, F, vRegF, S)
1068 REDUCE_ADDF_PARTIAL(reduce_addF_partial, F, vRegF, S)
1069 REDUCE_ADDF(reduce_addD, D, vRegD, D)
1070 REDUCE_ADDF_PARTIAL(reduce_addD_partial, D, vRegD, D)
1071 
1072 // vector and reduction
1073 
1074 instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1075   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1076             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1077   match(Set dst (AndReductionV src1 src2));
1078   effect(TEMP_DEF dst, TEMP vtmp);
1079   ins_cost(SVE_COST);
1080   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %}
1081   ins_encode %{
1082     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1083     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1084     __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1085     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1086     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
1087     if (bt == T_BYTE) {
1088       __ sxtb($dst$$Register, $dst$$Register);
1089     } else if (bt == T_SHORT) {
1090       __ sxth($dst$$Register, $dst$$Register);
1091     } else {
1092       assert(bt == T_INT, "unsupported type");
1093     }
1094   %}
1095   ins_pipe(pipe_slow);
1096 %}
1097 
1098 instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1099                              pRegGov ptmp, rFlagsReg cr) %{
1100   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1101             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1102   match(Set dst (AndReductionV src1 src2));
1103   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1104   ins_cost(SVE_COST);
1105   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %}
1106   ins_encode %{
1107     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1108     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1109     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1110                           Matcher::vector_length(this, $src2));
1111     __ sve_andv(as_FloatRegister($vtmp$$reg), variant,
1112                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1113     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1114     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
1115     if (bt == T_BYTE) {
1116       __ sxtb($dst$$Register, $dst$$Register);
1117     } else if (bt == T_SHORT) {
1118       __ sxth($dst$$Register, $dst$$Register);
1119     } else {
1120       assert(bt == T_INT, "unsupported type");
1121     }
1122   %}
1123   ins_pipe(pipe_slow);
1124 %}
1125 
1126 instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1127   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1128             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1129   match(Set dst (AndReductionV src1 src2));
1130   effect(TEMP_DEF dst, TEMP vtmp);
1131   ins_cost(SVE_COST);
1132   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %}
1133   ins_encode %{
1134     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1135     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1136     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
1137   %}
1138   ins_pipe(pipe_slow);
1139 %}
1140 
1141 instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1142                              pRegGov ptmp, rFlagsReg cr) %{
1143   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1144             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1145   match(Set dst (AndReductionV src1 src2));
1146   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1147   ins_cost(SVE_COST);
1148   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %}
1149   ins_encode %{
1150     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1151                           Matcher::vector_length(this, $src2));
1152     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D,
1153                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1154     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1155     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
1156   %}
1157   ins_pipe(pipe_slow);
1158 %}
1159 
1160 // vector or reduction
1161 
1162 instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1163   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1164             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1165   match(Set dst (OrReductionV src1 src2));
1166   effect(TEMP_DEF dst, TEMP vtmp);
1167   ins_cost(SVE_COST);
1168   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %}
1169   ins_encode %{
1170     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1171     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1172     __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1173     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1174     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
1175     if (bt == T_BYTE) {
1176       __ sxtb($dst$$Register, $dst$$Register);
1177     } else if (bt == T_SHORT) {
1178       __ sxth($dst$$Register, $dst$$Register);
1179     } else {
1180       assert(bt == T_INT, "unsupported type");
1181     }
1182   %}
1183   ins_pipe(pipe_slow);
1184 %}
1185 
1186 instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1187                              pRegGov ptmp, rFlagsReg cr) %{
1188   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1189             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1190   match(Set dst (OrReductionV src1 src2));
1191   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1192   ins_cost(SVE_COST);
1193   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %}
1194   ins_encode %{
1195     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1196     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1197     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1198                           Matcher::vector_length(this, $src2));
1199     __ sve_orv(as_FloatRegister($vtmp$$reg), variant,
1200                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1201     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1202     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
1203     if (bt == T_BYTE) {
1204       __ sxtb($dst$$Register, $dst$$Register);
1205     } else if (bt == T_SHORT) {
1206       __ sxth($dst$$Register, $dst$$Register);
1207     } else {
1208       assert(bt == T_INT, "unsupported type");
1209     }
1210   %}
1211   ins_pipe(pipe_slow);
1212 %}
1213 
1214 instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1215   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1216             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1217   match(Set dst (OrReductionV src1 src2));
1218   effect(TEMP_DEF dst, TEMP vtmp);
1219   ins_cost(SVE_COST);
1220   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %}
1221   ins_encode %{
1222     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1223     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1224     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
1225   %}
1226   ins_pipe(pipe_slow);
1227 %}
1228 
1229 instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1230                              pRegGov ptmp, rFlagsReg cr) %{
1231   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1232             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1233   match(Set dst (OrReductionV src1 src2));
1234   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1235   ins_cost(SVE_COST);
1236   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %}
1237   ins_encode %{
1238     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1239                           Matcher::vector_length(this, $src2));
1240     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D,
1241                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1242     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1243     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
1244   %}
1245   ins_pipe(pipe_slow);
1246 %}
1247 
1248 // vector xor reduction
1249 
1250 instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1251   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1252             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1253   match(Set dst (XorReductionV src1 src2));
1254   effect(TEMP_DEF dst, TEMP vtmp);
1255   ins_cost(SVE_COST);
1256   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %}
1257   ins_encode %{
1258     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1259     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1260     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1261     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1262     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
1263     if (bt == T_BYTE) {
1264       __ sxtb($dst$$Register, $dst$$Register);
1265     } else if (bt == T_SHORT) {
1266       __ sxth($dst$$Register, $dst$$Register);
1267     } else {
1268       assert(bt == T_INT, "unsupported type");
1269     }
1270   %}
1271   ins_pipe(pipe_slow);
1272 %}
1273 
1274 instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1275                              pRegGov ptmp, rFlagsReg cr) %{
1276   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1277             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1278   match(Set dst (XorReductionV src1 src2));
1279   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1280   ins_cost(SVE_COST);
1281   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %}
1282   ins_encode %{
1283     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1284     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1285     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1286                           Matcher::vector_length(this, $src2));
1287     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant,
1288                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1289     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1290     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
1291     if (bt == T_BYTE) {
1292       __ sxtb($dst$$Register, $dst$$Register);
1293     } else if (bt == T_SHORT) {
1294       __ sxth($dst$$Register, $dst$$Register);
1295     } else {
1296       assert(bt == T_INT, "unsupported type");
1297     }
1298   %}
1299   ins_pipe(pipe_slow);
1300 %}
1301 
1302 instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1303   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1304             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1305   match(Set dst (XorReductionV src1 src2));
1306   effect(TEMP_DEF dst, TEMP vtmp);
1307   ins_cost(SVE_COST);
1308   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %}
1309   ins_encode %{
1310     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1311     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1312     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
1313   %}
1314   ins_pipe(pipe_slow);
1315 %}
1316 
1317 instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1318                              pRegGov ptmp, rFlagsReg cr) %{
1319   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1320             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1321   match(Set dst (XorReductionV src1 src2));
1322   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1323   ins_cost(SVE_COST);
1324   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %}
1325   ins_encode %{
1326     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1327                           Matcher::vector_length(this, $src2));
1328     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D,
1329                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1330     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1331     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
1332   %}
1333   ins_pipe(pipe_slow);
1334 %}
1335 
1336 dnl
1337 dnl REDUCE_MAXMIN_I($1,      $2,      $3 )
1338 dnl REDUCE_MAXMIN_I(min_max, op_mame, cmp)
1339 define(`REDUCE_MAXMIN_I', `
1340 instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1341   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1342             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1343              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1344              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1345   match(Set dst ($2 src1 src2));
1346   effect(TEMP_DEF dst, TEMP vtmp);
1347   ins_cost(SVE_COST);
1348   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1B/S/I (sve)" %}
1349   ins_encode %{
1350     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1351     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1352     __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1353     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1354     __ cmpw($dst$$Register, $src1$$Register);
1355     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3);
1356   %}
1357   ins_pipe(pipe_slow);
1358 %}')dnl
1359 dnl
1360 dnl REDUCE_MAXMIN_L($1,      $2,      $3 )
1361 dnl REDUCE_MAXMIN_L(min_max, op_name, cmp)
1362 define(`REDUCE_MAXMIN_L', `
1363 instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1364   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1365             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1366   match(Set dst ($2 src1 src2));
1367   effect(TEMP_DEF dst, TEMP vtmp);
1368   ins_cost(SVE_COST);
1369   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %}
1370   ins_encode %{
1371     __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1372     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1373     __ cmp($dst$$Register, $src1$$Register);
1374     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3);
1375   %}
1376   ins_pipe(pipe_slow);
1377 %}')dnl
1378 dnl
1379 dnl REDUCE_MAXMIN_I_PARTIAL($1,      $2,      $3 )
1380 dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_mame, cmp)
1381 define(`REDUCE_MAXMIN_I_PARTIAL', `
1382 instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1383                              pRegGov ptmp, rFlagsReg cr) %{
1384   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1385             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1386              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1387              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1388   match(Set dst ($2 src1 src2));
1389   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1390   ins_cost(SVE_COST);
1391   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# reduce $1I partial (sve)" %}
1392   ins_encode %{
1393     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1394     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1395     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1396                           Matcher::vector_length(this, $src2));
1397     __ sve_s$1v(as_FloatRegister($vtmp$$reg), variant,
1398                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1399     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1400     __ cmpw($dst$$Register, $src1$$Register);
1401     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3);
1402   %}
1403   ins_pipe(pipe_slow);
1404 %}')dnl
1405 dnl
1406 dnl REDUCE_MAXMIN_L_PARTIAL($1,      $2,      $3 )
1407 dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name, cmp)
1408 define(`REDUCE_MAXMIN_L_PARTIAL', `
1409 instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1410                              pRegGov ptmp, rFlagsReg cr) %{
1411   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1412             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1413   match(Set dst ($2 src1 src2));
1414   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1415   ins_cost(SVE_COST);
1416   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# reduce $1L partial (sve)" %}
1417   ins_encode %{
1418     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1419                           Matcher::vector_length(this, $src2));
1420     __ sve_s$1v(as_FloatRegister($vtmp$$reg), __ D,
1421                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1422     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1423     __ cmp($dst$$Register, $src1$$Register);
1424     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::$3);
1425   %}
1426   ins_pipe(pipe_slow);
1427 %}')dnl
1428 dnl
1429 dnl REDUCE_FMINMAX($1,      $2,          $3,           $4,   $5         )
1430 dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
1431 define(`REDUCE_FMINMAX', `
1432 instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
1433   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1434             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1435   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1436   ins_cost(INSN_COST);
1437   effect(TEMP_DEF dst);
1438   format %{ "sve_f$1v $dst, $src2 # vector (sve) ($4)\n\t"
1439             "f$1s $dst, $dst, $src1\t# $1 reduction $2" %}
1440   ins_encode %{
1441     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
1442          ptrue, as_FloatRegister($src2$$reg));
1443     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1444   %}
1445   ins_pipe(pipe_slow);
1446 %}')dnl
1447 dnl
1448 dnl
1449 dnl REDUCE_FMINMAX_PARTIAL($1,      $2,          $3,           $4,   $5         )
1450 dnl REDUCE_FMINMAX_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst)
1451 define(`REDUCE_FMINMAX_PARTIAL', `
1452 instruct reduce_$1$2_partial($5 dst, $5 src1, vReg src2,
1453                              pRegGov ptmp, rFlagsReg cr) %{
1454   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1455             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1456   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1457   ins_cost(INSN_COST);
1458   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1459   format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# reduce $1 $4 partial (sve)" %}
1460   ins_encode %{
1461     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1462                           Matcher::vector_length(this, $src2));
1463     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
1464          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1465     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1466   %}
1467   ins_pipe(pipe_slow);
1468 %}')dnl
1469 
1470 // vector max reduction
1471 REDUCE_MAXMIN_I(max, MaxReductionV, GT)
1472 REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV, GT)
1473 REDUCE_MAXMIN_L(max, MaxReductionV, GT)
1474 REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV, GT)
1475 REDUCE_FMINMAX(max, F, T_FLOAT,  S, vRegF)
1476 REDUCE_FMINMAX_PARTIAL(max, F, T_FLOAT,  S, vRegF)
1477 REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
1478 REDUCE_FMINMAX_PARTIAL(max, D, T_DOUBLE, D, vRegD)
1479 
1480 // vector min reduction
1481 REDUCE_MAXMIN_I(min, MinReductionV, LT)
1482 REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV, LT)
1483 REDUCE_MAXMIN_L(min, MinReductionV, LT)
1484 REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV, LT)
1485 REDUCE_FMINMAX(min, F, T_FLOAT,  S, vRegF)
1486 REDUCE_FMINMAX_PARTIAL(min, F, T_FLOAT,  S, vRegF)
1487 REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
1488 REDUCE_FMINMAX_PARTIAL(min, D, T_DOUBLE, D, vRegD)
1489 
1490 // vector Math.rint, floor, ceil
1491 
1492 instruct vroundD(vReg dst, vReg src, immI rmode) %{
1493   predicate(UseSVE > 0 &&
1494             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1495   match(Set dst (RoundDoubleModeV src rmode));
1496   format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
1497   ins_encode %{
1498     switch ($rmode$$constant) {
1499       case RoundDoubleModeNode::rmode_rint:
1500         __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
1501              ptrue, as_FloatRegister($src$$reg));
1502         break;
1503       case RoundDoubleModeNode::rmode_floor:
1504         __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
1505              ptrue, as_FloatRegister($src$$reg));
1506         break;
1507       case RoundDoubleModeNode::rmode_ceil:
1508         __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
1509              ptrue, as_FloatRegister($src$$reg));
1510         break;
1511     }
1512   %}
1513   ins_pipe(pipe_slow);
1514 %}
1515 dnl
1516 dnl REPLICATE($1,        $2,      $3,      $4,   $5         )
1517 dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
1518 define(`REPLICATE', `
1519 instruct $1(vReg dst, $3 src) %{
1520   predicate(UseSVE > 0);
1521   match(Set dst ($2 src));
1522   ins_cost(SVE_COST);
1523   format %{ "sve_dup  $dst, $src\t# vector (sve) ($4)" %}
1524   ins_encode %{
1525     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
1526   %}
1527   ins_pipe(pipe_slow);
1528 %}')dnl
1529 dnl
1530 dnl REPLICATE_IMM8($1,        $2,      $3,       $4,   $5         )
1531 dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
1532 define(`REPLICATE_IMM8', `
1533 instruct $1(vReg dst, $3 con) %{
1534   predicate(UseSVE > 0);
1535   match(Set dst ($2 con));
1536   ins_cost(SVE_COST);
1537   format %{ "sve_dup  $dst, $con\t# vector (sve) ($4)" %}
1538   ins_encode %{
1539     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
1540   %}
1541   ins_pipe(pipe_slow);
1542 %}')dnl
1543 dnl
1544 dnl FREPLICATE($1,        $2,      $3,        $4)
1545 dnl FREPLICATE(insn_name, op_name, reg_src, size)
1546 define(`FREPLICATE', `
1547 instruct $1(vReg dst, $3 src) %{
1548   predicate(UseSVE > 0);
1549   match(Set dst ($2 src));
1550   ins_cost(SVE_COST);
1551   format %{ "sve_cpy  $dst, $src\t# vector (sve) ($4)" %}
1552   ins_encode %{
1553     __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
1554          ptrue, as_FloatRegister($src$$reg));
1555   %}
1556   ins_pipe(pipe_slow);
1557 %}')dnl
1558 
1559 // vector replicate
1560 REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
1561 REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
1562 REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
1563 REPLICATE(replicateL, ReplicateL, iRegL,      D, 2)
1564 REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8,        B, 16)
1565 REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
1566 REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
1567 REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
1568 FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
1569 FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
1570 dnl
1571 dnl VSHIFT_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
1572 dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
1573 define(`VSHIFT_TRUE_PREDICATE', `
1574 instruct $1(vReg dst, vReg shift) %{
1575   predicate(UseSVE > 0);
1576   match(Set dst ($2 dst shift));
1577   ins_cost(SVE_COST);
1578   format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
1579   ins_encode %{
1580     __ $5(as_FloatRegister($dst$$reg), __ $3,
1581          ptrue, as_FloatRegister($shift$$reg));
1582   %}
1583   ins_pipe(pipe_slow);
1584 %}')dnl
1585 dnl
1586 dnl VSHIFT_IMM_UNPREDICATED($1,        $2,      $3,       $4,   $5,          $6  )
1587 dnl VSHIFT_IMM_UNPREDICATED(insn_name, op_name, op_name2, size, min_vec_len, insn)
1588 define(`VSHIFT_IMM_UNPREDICATED', `
1589 instruct $1(vReg dst, vReg src, immI shift) %{
1590   predicate(UseSVE > 0);
1591   match(Set dst ($2 src ($3 shift)));
1592   ins_cost(SVE_COST);
1593   format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
1594   ins_encode %{
1595     int con = (int)$shift$$constant;dnl
1596 ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
1597     if (con == 0) {
1598       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1599            as_FloatRegister($src$$reg));
1600       return;
1601     }')dnl
1602 ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1603     if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
1604     if (con >= 16) con = 15;')')dnl
1605 ifelse(eval(index(`$1', `vlsl') == 0  || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1606     if (con >= 8) {
1607       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1608            as_FloatRegister($src$$reg));
1609       return;
1610     }')ifelse(eval(index(`$4', `H') == 0), 1, `
1611     if (con >= 16) {
1612       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1613            as_FloatRegister($src$$reg));
1614       return;
1615     }')')
1616     __ $6(as_FloatRegister($dst$$reg), __ $4,
1617          as_FloatRegister($src$$reg), con);
1618   %}
1619   ins_pipe(pipe_slow);
1620 %}')dnl
1621 dnl
1622 dnl VSHIFT_COUNT($1,        $2,   $3,          $4  )
1623 dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
1624 define(`VSHIFT_COUNT', `
1625 instruct $1(vReg dst, iRegIorL2I cnt) %{
1626   predicate(UseSVE > 0 &&
1627             ELEMENT_SHORT_CHAR($4, n));
1628   match(Set dst (LShiftCntV cnt));
1629   match(Set dst (RShiftCntV cnt));
1630   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
1631   ins_encode %{
1632     __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
1633   %}
1634   ins_pipe(pipe_slow);
1635 %}')dnl
1636 
1637 // vector shift
1638 VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB,  B, 16, sve_asr)
1639 VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS,  H,  8, sve_asr)
1640 VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI,  S,  4, sve_asr)
1641 VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL,  D,  2, sve_asr)
1642 VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB,  B, 16, sve_lsl)
1643 VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS,  H,  8, sve_lsl)
1644 VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI,  S,  4, sve_lsl)
1645 VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL,  D,  2, sve_lsl)
1646 VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
1647 VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H,  8, sve_lsr)
1648 VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S,  4, sve_lsr)
1649 VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D,  2, sve_lsr)
1650 VSHIFT_IMM_UNPREDICATED(vasrB_imm, RShiftVB,  RShiftCntV, B, 16, sve_asr)
1651 VSHIFT_IMM_UNPREDICATED(vasrS_imm, RShiftVS,  RShiftCntV, H,  8, sve_asr)
1652 VSHIFT_IMM_UNPREDICATED(vasrI_imm, RShiftVI,  RShiftCntV, S,  4, sve_asr)
1653 VSHIFT_IMM_UNPREDICATED(vasrL_imm, RShiftVL,  RShiftCntV, D,  2, sve_asr)
1654 VSHIFT_IMM_UNPREDICATED(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
1655 VSHIFT_IMM_UNPREDICATED(vlsrS_imm, URShiftVS, RShiftCntV, H,  8, sve_lsr)
1656 VSHIFT_IMM_UNPREDICATED(vlsrI_imm, URShiftVI, RShiftCntV, S,  4, sve_lsr)
1657 VSHIFT_IMM_UNPREDICATED(vlsrL_imm, URShiftVL, RShiftCntV, D,  2, sve_lsr)
1658 VSHIFT_IMM_UNPREDICATED(vlslB_imm, LShiftVB,  LShiftCntV, B, 16, sve_lsl)
1659 VSHIFT_IMM_UNPREDICATED(vlslS_imm, LShiftVS,  LShiftCntV, H,  8, sve_lsl)
1660 VSHIFT_IMM_UNPREDICATED(vlslI_imm, LShiftVI,  LShiftCntV, S,  4, sve_lsl)
1661 VSHIFT_IMM_UNPREDICATED(vlslL_imm, LShiftVL,  LShiftCntV, D,  2, sve_lsl)
1662 VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
1663 VSHIFT_COUNT(vshiftcntS, H,  8, T_SHORT)
1664 VSHIFT_COUNT(vshiftcntI, S,  4, T_INT)
1665 VSHIFT_COUNT(vshiftcntL, D,  2, T_LONG)
1666 
1667 // vector sqrt
1668 UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
1669 UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
1670 
1671 // vector sub
1672 BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
1673 BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
1674 BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
1675 BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
1676 BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
1677 BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
1678 
1679 // vector mask cast
1680 
1681 instruct vmaskcast(vReg dst) %{
1682   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
1683             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
1684   match(Set dst (VectorMaskCast dst));
1685   ins_cost(0);
1686   format %{ "vmaskcast $dst\t# empty (sve)" %}
1687   ins_encode %{
1688     // empty
1689   %}
1690   ins_pipe(pipe_class_empty);
1691 %}
1692 
1693 // ------------------------------ Vector cast -------------------------------
1694 dnl
1695 dnl
1696 define(`VECTOR_CAST_EXTEND1', `
1697 instruct vcvt$1to$2`'(vReg dst, vReg src)
1698 %{
1699   predicate(UseSVE > 0 &&
1700             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1701   match(Set dst (VectorCast$1`'2X src));
1702   ins_cost(SVE_COST);
1703   format %{ "sve_$3  $dst, $4, $src\t# convert $1 to $2 vector" %}
1704   ins_encode %{
1705     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1706   %}
1707   ins_pipe(pipe_slow);
1708 %}')dnl
1709 dnl
1710 dnl
1711 define(`VECTOR_CAST_EXTEND2', `
1712 instruct vcvt$1to$2`'(vReg dst, vReg src)
1713 %{
1714   predicate(UseSVE > 0 &&
1715             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1716   match(Set dst (VectorCast$1`'2X src));
1717   ins_cost(2 * SVE_COST);
1718   format %{ "sve_$3  $dst, $4, $src\n\t"
1719             "sve_$3  $dst, $5, $dst\t# convert $1 to $2 vector" %}
1720   ins_encode %{
1721     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1722     __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
1723   %}
1724   ins_pipe(pipe_slow);
1725 %}')dnl
1726 dnl
1727 dnl
1728 define(`VECTOR_CAST_EXTEND3', `
1729 instruct vcvt$1to$2`'(vReg dst, vReg src)
1730 %{
1731   predicate(UseSVE > 0 &&
1732             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1733   match(Set dst (VectorCast$1`'2X src));
1734   ins_cost(3 * SVE_COST);
1735   format %{ "sve_$3  $dst, $4, $src\n\t"
1736             "sve_$3  $dst, $5, $dst\n\t"
1737             "sve_$3  $dst, $6, $dst\t# convert $1 to $2 vector" %}
1738   ins_encode %{
1739     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1740     __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
1741     __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
1742   %}
1743   ins_pipe(pipe_slow);
1744 %}')dnl
1745 dnl
1746 dnl
1747 define(`VECTOR_CAST_NARROW1', `
1748 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1749 %{
1750   predicate(UseSVE > 0 &&
1751             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1752   match(Set dst (VectorCast$1`'2X src));
1753   effect(TEMP tmp);
1754   ins_cost(2 * SVE_COST);
1755   format %{ "sve_$3  $tmp, $4, 0\n\t"
1756             "sve_$5  $dst, $4, $src, tmp\t# convert $1 to $2 vector" %}
1757   ins_encode %{
1758     __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0);
1759     __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1760   %}
1761   ins_pipe(pipe_slow);
1762 %}')dnl
1763 dnl
1764 dnl
1765 define(`VECTOR_CAST_NARROW2', `
1766 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1767 %{
1768   predicate(UseSVE > 0 &&
1769             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1770   match(Set dst (VectorCast$1`'2X src));
1771   effect(TEMP_DEF dst, TEMP tmp);
1772   ins_cost(3 * SVE_COST);
1773   format %{ "sve_$3  $tmp, $4, 0\n\t"
1774             "sve_$5  $dst, $4, $src, tmp\n\t"
1775             "sve_$5  $dst, $6, $dst, tmp\n\t# convert $1 to $2 vector" %}
1776   ins_encode %{
1777     __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0);
1778     __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1779     __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1780   %}
1781   ins_pipe(pipe_slow);
1782 %}')dnl
1783 dnl
1784 dnl
1785 define(`VECTOR_CAST_NARROW3', `
1786 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1787 %{
1788   predicate(UseSVE > 0 &&
1789             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1790   match(Set dst (VectorCast$1`'2X src));
1791   effect(TEMP_DEF dst, TEMP tmp);
1792   ins_cost(4 * SVE_COST);
1793   format %{ "sve_$3  $tmp, $4, 0\n\t"
1794             "sve_$5  $dst, $4, $src, tmp\n\t"
1795             "sve_$5  $dst, $6, $dst, tmp\n\t"
1796             "sve_$5  $dst, $7, $dst, tmp\n\t# convert $1 to $2 vector" %}
1797   ins_encode %{
1798     __ sve_$3(as_FloatRegister($tmp$$reg), __ $4, 0);
1799     __ sve_$5(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1800     __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1801     __ sve_$5(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1802   %}
1803   ins_pipe(pipe_slow);
1804 %}')dnl
1805 dnl
1806 dnl
1807 define(`VECTOR_CAST_I2F_EXTEND2', `
1808 instruct vcvt$1to$2`'(vReg dst, vReg src)
1809 %{
1810   predicate(UseSVE > 0 &&
1811             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1812   match(Set dst (VectorCast$1`'2X src));
1813   ins_cost(3 * SVE_COST);
1814   format %{ "sve_$3  $dst, $4, $src\n\t"
1815             "sve_$3  $dst, $5, $dst\n\t"
1816             "sve_$6  $dst, $5, $dst, $5\t# convert $1 to $2 vector" %}
1817   ins_encode %{
1818     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1819     __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
1820     __ sve_$6(as_FloatRegister($dst$$reg), __ $5, ptrue, as_FloatRegister($dst$$reg), __ $5);
1821   %}
1822   ins_pipe(pipe_slow);
1823 %}')dnl
1824 dnl
1825 dnl
1826 define(`VECTOR_CAST_I2F_EXTEND3', `
1827 instruct vcvt$1to$2`'(vReg dst, vReg src)
1828 %{
1829   predicate(UseSVE > 0 &&
1830             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1831   match(Set dst (VectorCast$1`'2X src));
1832   ins_cost(4 * SVE_COST);
1833   format %{ "sve_$3  $dst, $4, $src\n\t"
1834             "sve_$3  $dst, $5, $dst\n\t"
1835             "sve_$3  $dst, $6, $dst\n\t"
1836             "sve_$7  $dst, $6, $dst, $6\t# convert $1 to $2 vector" %}
1837   ins_encode %{
1838     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1839     __ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
1840     __ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
1841     __ sve_$7(as_FloatRegister($dst$$reg), __ $6, ptrue, as_FloatRegister($dst$$reg), __ $6);
1842   %}
1843   ins_pipe(pipe_slow);
1844 %}')dnl
1845 dnl
1846 dnl
1847 define(`VECTOR_CAST_X2F_NARROW1', `
1848 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1849 %{
1850   predicate(UseSVE > 0 &&
1851             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1852   match(Set dst (VectorCast$1`'2X src));
1853   effect(TEMP_DEF dst, TEMP tmp);
1854   ins_cost(3 * SVE_COST);
1855   format %{ "sve_$3  $dst, $4, $src, $5\n\t"
1856             "sve_$6  $tmp, $7, 0\n\t"
1857             "sve_$8  $dst, $7, $dst, $tmp\t# convert $1 to $2 vector" %}
1858   ins_encode %{
1859     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $5);
1860     __ sve_$6(as_FloatRegister($tmp$$reg), __ $7, 0);
1861     __ sve_$8(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1862   %}
1863   ins_pipe(pipe_slow);
1864 %}')dnl
1865 dnl
1866 dnl
1867 define(`VECTOR_CAST_X2X', `
1868 instruct vcvt$1to$2`'(vReg dst, vReg src)
1869 %{
1870   predicate(UseSVE > 0 &&
1871             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1872   match(Set dst (VectorCast$1`'2X src));
1873   ins_cost(SVE_COST);
1874   format %{ "sve_$3  $dst, $4, $src, $4\t# convert $1 to $2 vector" %}
1875   ins_encode %{
1876     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1877   %}
1878   ins_pipe(pipe_slow);
1879 %}')dnl
1880 dnl
1881 dnl
1882 define(`VECTOR_CAST_X2F_EXTEND1', `
1883 instruct vcvt$1to$2`'(vReg dst, vReg src)
1884 %{
1885   predicate(UseSVE > 0 &&
1886             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1887   match(Set dst (VectorCast$1`'2X src));
1888   ins_cost(2 * SVE_COST);
1889   format %{ "sve_$3  $dst, $4, $src\n\t"
1890             "sve_$5  $dst, $4, $dst, $6\t# convert $1 to $2 vector" %}
1891   ins_encode %{
1892     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
1893     __ sve_$5(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($dst$$reg), __ $6);
1894   %}
1895   ins_pipe(pipe_slow);
1896 %}')dnl
1897 dnl
1898 dnl
1899 define(`VECTOR_CAST_F2X_NARROW1', `
1900 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1901 %{
1902   predicate(UseSVE > 0 &&
1903             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1904   match(Set dst (VectorCast$1`'2X src));
1905   effect(TEMP_DEF dst, TEMP tmp);
1906   ins_cost(3 * SVE_COST);
1907   format %{ "sve_$3  $dst, $4, $src, $4\n\t"
1908             "sve_$5  $tmp, $6, 0\n\t"
1909             "sve_$7  $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %}
1910   ins_encode %{
1911     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1912     __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
1913     __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1914   %}
1915   ins_pipe(pipe_slow);
1916 %}')dnl
1917 dnl
1918 dnl
1919 define(`VECTOR_CAST_F2X_NARROW2', `
1920 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1921 %{
1922   predicate(UseSVE > 0 &&
1923             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1924   match(Set dst (VectorCast$1`'2X src));
1925   effect(TEMP_DEF dst, TEMP tmp);
1926   ins_cost(4 * SVE_COST);
1927   format %{ "sve_$3  $dst, $4, $src, $4\n\t"
1928             "sve_$5  $tmp, $6, 0\n\t"
1929             "sve_$7  $dst, $6, $dst, tmp\n\t"
1930             "sve_$7  $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %}
1931   ins_encode %{
1932     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1933     __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
1934     __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1935     __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1936   %}
1937   ins_pipe(pipe_slow);
1938 %}')dnl
1939 dnl
1940 dnl
1941 define(`VECTOR_CAST_F2X_EXTEND1', `
1942 instruct vcvt$1to$2`'(vReg dst, vReg src)
1943 %{
1944   predicate(UseSVE > 0 &&
1945             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1946   match(Set dst (VectorCast$1`'2X src));
1947   ins_cost(2 * SVE_COST);
1948   format %{ "sve_$3  $dst, $4, $src, $4\n\t"
1949             "sve_$5  $dst, $6, $dst\t# convert $1 to $2 vector" %}
1950   ins_encode %{
1951     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1952     __ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
1953   %}
1954   ins_pipe(pipe_slow);
1955 %}')dnl
1956 dnl
1957 dnl
1958 define(`VECTOR_CAST_F2X_NARROW3', `
1959 instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1960 %{
1961   predicate(UseSVE > 0 &&
1962             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1963   match(Set dst (VectorCast$1`'2X src));
1964   effect(TEMP_DEF dst, TEMP tmp);
1965   ins_cost(5 * SVE_COST);
1966   format %{ "sve_$3  $dst, $4, $src, $4\n\t"
1967             "sve_$5  $tmp, $6, 0\n\t"
1968             "sve_$7  $dst, $6, $dst, tmp\n\t"
1969             "sve_$7  $dst, $8, $dst, tmp\n\t"
1970             "sve_$7  $dst, $9, $dst, tmp\n\t# convert $1 to $2 vector" %}
1971   ins_encode %{
1972     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1973     __ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
1974     __ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1975     __ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1976     __ sve_$7(as_FloatRegister($dst$$reg), __ $9, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1977   %}
1978   ins_pipe(pipe_slow);
1979 %}')dnl
1980 dnl
1981 VECTOR_CAST_EXTEND1(B, S, sunpklo, H)
1982 VECTOR_CAST_EXTEND2(B, I, sunpklo, H, S)
1983 VECTOR_CAST_EXTEND3(B, L, sunpklo, H, S, D)
1984 VECTOR_CAST_I2F_EXTEND2(B, F, sunpklo, H, S, scvtf)
1985 VECTOR_CAST_I2F_EXTEND3(B, D, sunpklo, H, S, D, scvtf)
1986 dnl
1987 VECTOR_CAST_NARROW1(S, B, dup, B, uzp1)
1988 VECTOR_CAST_EXTEND1(S, I, sunpklo, S)
1989 VECTOR_CAST_EXTEND2(S, L, sunpklo, S, D)
1990 VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S)
1991 VECTOR_CAST_I2F_EXTEND2(S, D, sunpklo, S, D, scvtf)
1992 dnl
1993 VECTOR_CAST_NARROW2(I, B, dup, H, uzp1, B)
1994 VECTOR_CAST_NARROW1(I, S, dup, H, uzp1)
1995 VECTOR_CAST_EXTEND1(I, L, sunpklo, D)
1996 VECTOR_CAST_X2X(I, F, scvtf, S)
1997 VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D)
1998 dnl
1999 VECTOR_CAST_NARROW3(L, B, dup, S, uzp1, H, B)
2000 VECTOR_CAST_NARROW2(L, S, dup, S, uzp1, H)
2001 VECTOR_CAST_NARROW1(L, I, dup, S, uzp1)
2002 VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1)
2003 VECTOR_CAST_X2X(L, D, scvtf, D)
2004 dnl
2005 VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B)
2006 VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1)
2007 VECTOR_CAST_X2X(F, I, fcvtzs, S)
2008 VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D)
2009 VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S)
2010 dnl
2011 VECTOR_CAST_F2X_NARROW3(D, B, fcvtzs, D, dup, S, uzp1, H, B)
2012 VECTOR_CAST_F2X_NARROW2(D, S, fcvtzs, D, dup, S, uzp1, H)
2013 VECTOR_CAST_F2X_NARROW1(D, I, fcvtzs, D, dup, S, uzp1)
2014 VECTOR_CAST_X2X(D, L, fcvtzs, D)
2015 VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1)
2016 dnl
2017 dnl
2018 // ------------------------------ Vector extract ---------------------------------
2019 define(`VECTOR_EXTRACT_SXT', `
2020 instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
2021 %{
2022   predicate(UseSVE > 0);
2023   match(Set dst (Extract$1 src idx));
2024   effect(TEMP pTmp, KILL cr);
2025   ins_cost(2 * SVE_COST);
2026   format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\n\t"
2027             "sbfmw $dst, $dst, 0U, $5\t# extract from vector($1)" %}
2028   ins_encode %{
2029     __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg),
2030                    as_FloatRegister($src$$reg), (int)($idx$$constant));
2031     __ sbfmw(as_$4($dst$$reg), as_$4($dst$$reg), 0U, $5);
2032   %}
2033   ins_pipe(pipe_slow);
2034 %}')dnl
2035 dnl                $1 $2         $3 $4        $5
2036 VECTOR_EXTRACT_SXT(B, iRegINoSp, B, Register, 7U)
2037 VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U)
2038 
2039 dnl
2040 define(`VECTOR_EXTRACT', `
2041 instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
2042 %{
2043   predicate(UseSVE > 0);
2044   match(Set dst (Extract$1 src idx));
2045   effect(TEMP pTmp, KILL cr);
2046   ins_cost(2 * SVE_COST);
2047   format %{ "sve_extract $dst, $3, $pTmp, $src, $idx\t# extract from vector($1)" %}
2048   ins_encode %{
2049     __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg),
2050                    as_FloatRegister($src$$reg), (int)($idx$$constant));
2051   %}
2052   ins_pipe(pipe_slow);
2053 %}')dnl
2054 dnl            $1 $2         $3 $4
2055 VECTOR_EXTRACT(I, iRegINoSp, S, Register)
2056 VECTOR_EXTRACT(L, iRegLNoSp, D, Register)
2057 VECTOR_EXTRACT(F, vRegF,     S, FloatRegister)
2058 VECTOR_EXTRACT(D, vRegD,     D, FloatRegister)
2059 
2060 // ------------------------------- VectorTest ----------------------------------
2061 dnl
2062 dnl VTEST($1,      $2,   $3,  $4  )
2063 dnl VTEST(op_name, pred, imm, cond)
2064 define(`VTEST', `
2065 instruct vtest_$1`'(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr)
2066 %{
2067   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
2068             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::$2);
2069   match(Set dst (VectorTest src1 src2));
2070   effect(TEMP pTmp, KILL cr);
2071   ins_cost(SVE_COST);
2072   format %{ "sve_cmpeq $pTmp, $src1, $3\n\t"
2073             "csetw $dst, $4\t# VectorTest (sve) - $1" %}
2074   ins_encode %{
2075     // "src2" is not used for sve.
2076     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
2077     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2078     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
2079                ptrue, as_FloatRegister($src1$$reg), $3);
2080     __ csetw(as_Register($dst$$reg), Assembler::$4);
2081   %}
2082   ins_pipe(pipe_slow);
2083 %}')dnl
2084 dnl
2085 VTEST(alltrue, overflow, 0, EQ)
2086 VTEST(anytrue, ne,      -1, NE)
2087 dnl
2088 dnl
2089 dnl VTEST_PARTIAL($1,      $2,   $3,  $4  )
2090 dnl VTEST_PARTIAL(op_name, pred, imm, cond)
2091 define(`VTEST_PARTIAL', `
2092 instruct vtest_$1_partial`'(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr)
2093 %{
2094   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
2095             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::$2);
2096   match(Set dst (VectorTest src1 src2));
2097   effect(TEMP pTmp, KILL cr);
2098   ins_cost(SVE_COST);
2099   format %{ "vtest_$1_partial $dst, $src1, $src2\t# VectorTest partial (sve) - $1" %}
2100   ins_encode %{
2101     // "src2" is not used for sve.
2102     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
2103     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2104     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size,
2105                           Matcher::vector_length(this, $src1));
2106     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
2107                as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), $3);
2108     __ csetw(as_Register($dst$$reg), Assembler::$4);
2109   %}
2110   ins_pipe(pipe_slow);
2111 %}')dnl
2112 dnl
2113 VTEST_PARTIAL(alltrue, overflow, 0, EQ)
2114 VTEST_PARTIAL(anytrue, ne,      -1, NE)
2115 
2116 // ------------------------------ Vector insert ---------------------------------
2117 
2118 instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr)
2119 %{
2120   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
2121             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2122              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2123              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2124   match(Set dst (VectorInsert (Binary src val) idx));
2125   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
2126   ins_cost(4 * SVE_COST);
2127   format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t"
2128             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2129             "sve_orr $dst, $src, $src\n\t"
2130             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
2131   ins_encode %{
2132     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2133     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2134     __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1);
2135     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
2136                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2137     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2138     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
2139   %}
2140   ins_pipe(pipe_slow);
2141 %}
2142 
2143 instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr)
2144 %{
2145   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
2146             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2147   match(Set dst (VectorInsert (Binary src val) idx));
2148   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
2149   ins_cost(4 * SVE_COST);
2150   format %{ "sve_index $dst, S, -16, 1\n\t"
2151             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2152             "sve_orr $dst, $src, $src\n\t"
2153             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
2154   ins_encode %{
2155     __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
2156     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
2157                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2158     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2159     __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
2160   %}
2161   ins_pipe(pipe_slow);
2162 %}
2163 
2164 instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
2165 %{
2166   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
2167             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2168              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2169              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2170   match(Set dst (VectorInsert (Binary src val) idx));
2171   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
2172   ins_cost(5 * SVE_COST);
2173   format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t"
2174             "sve_dup $dst, $idx\t# (B/S/I)\n\t"
2175             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
2176             "sve_orr $dst, $src, $src\n\t"
2177             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
2178   ins_encode %{
2179     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2180     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2181     __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1);
2182     __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant));
2183     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
2184                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
2185     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2186     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
2187   %}
2188   ins_pipe(pipe_slow);
2189 %}
2190 dnl
2191 dnl
2192 define(`VECTOR_INSERT_D', `
2193 instruct insert$1`'(vReg dst, vReg src, $2 val, immI idx, pRegGov pTmp, rFlagsReg cr)
2194 %{
2195   predicate(UseSVE > 0 &&
2196             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1));
2197   match(Set dst (VectorInsert (Binary src val) idx));
2198   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
2199   ins_cost(4 * SVE_COST);
2200   format %{ "sve_index $dst, $3, -16, 1\n\t"
2201             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2202             "sve_orr $dst, $src, $src\n\t"
2203             "sve_cpy $dst, $pTmp, $val\t# insert into vector ($1)" %}
2204   ins_encode %{
2205     __ sve_index(as_FloatRegister($dst$$reg), __ $3, -16, 1);
2206     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ $3, ptrue,
2207                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2208     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2209     __ sve_cpy(as_FloatRegister($dst$$reg), __ $3, as_PRegister($pTmp$$reg), as_$4($val$$reg));
2210   %}
2211   ins_pipe(pipe_slow);
2212 %}')dnl
2213 dnl             $1 $2     $3 $4
2214 VECTOR_INSERT_D(L, iRegL, D, Register)
2215 VECTOR_INSERT_D(D, vRegD, D, FloatRegister)
2216 
2217 instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
2218 %{
2219   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
2220             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2221   match(Set dst (VectorInsert (Binary src val) idx));
2222   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
2223   ins_cost(5 * SVE_COST);
2224   format %{ "sve_index $tmp1, S, 0, 1\n\t"
2225             "sve_dup $dst, S, $idx\n\t"
2226             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
2227             "sve_orr $dst, $src, $src\n\t"
2228             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
2229   ins_encode %{
2230     __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
2231     __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
2232     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
2233                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
2234     __ sve_orr(as_FloatRegister($dst$$reg),
2235                as_FloatRegister($src$$reg),
2236                as_FloatRegister($src$$reg));
2237     __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
2238                as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
2239   %}
2240   ins_pipe(pipe_slow);
2241 %}
2242 
2243 // ------------------------------ Vector shuffle -------------------------------
2244 
2245 instruct loadshuffleB(vReg dst, vReg src)
2246 %{
2247   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2248   match(Set dst (VectorLoadShuffle src));
2249   ins_cost(SVE_COST);
2250   format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %}
2251   ins_encode %{
2252     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2253       __ sve_orr(as_FloatRegister($dst$$reg),
2254                  as_FloatRegister($src$$reg),
2255                  as_FloatRegister($src$$reg));
2256     }
2257   %}
2258   ins_pipe(pipe_slow);
2259 %}
2260 
2261 instruct loadshuffleS(vReg dst, vReg src)
2262 %{
2263   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2264   match(Set dst (VectorLoadShuffle src));
2265   ins_cost(SVE_COST);
2266   format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %}
2267   ins_encode %{
2268     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2269   %}
2270   ins_pipe(pipe_slow);
2271 %}
2272 
2273 instruct loadshuffleI(vReg dst, vReg src)
2274 %{
2275   predicate(UseSVE > 0 &&
2276            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2277             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2278   match(Set dst (VectorLoadShuffle src));
2279   ins_cost(2 * SVE_COST);
2280   format %{ "sve_uunpklo $dst, H, $src\n\t"
2281             "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %}
2282   ins_encode %{
2283     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2284     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2285   %}
2286   ins_pipe(pipe_slow);
2287 %}
2288 
2289 instruct loadshuffleL(vReg dst, vReg src)
2290 %{
2291   predicate(UseSVE > 0 &&
2292            (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2293             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2294   match(Set dst (VectorLoadShuffle src));
2295   ins_cost(3 * SVE_COST);
2296   format %{ "sve_uunpklo $dst, H, $src\n\t"
2297             "sve_uunpklo $dst, S, $dst\n\t"
2298             "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %}
2299   ins_encode %{
2300     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2301     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2302     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2303   %}
2304   ins_pipe(pipe_slow);
2305 %}
2306 
2307 // ------------------------------ Vector rearrange -------------------------------
2308 
2309 instruct rearrange(vReg dst, vReg src, vReg shuffle)
2310 %{
2311   predicate(UseSVE > 0);
2312   match(Set dst (VectorRearrange src shuffle));
2313   ins_cost(SVE_COST);
2314   format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %}
2315   ins_encode %{
2316     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2317     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2318     __ sve_tbl(as_FloatRegister($dst$$reg), size,
2319                as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
2320   %}
2321   ins_pipe(pipe_slow);
2322 %}
2323 
2324 // ------------------------------ Vector Load Gather ---------------------------------
2325 
2326 instruct gatherI(vReg dst, indirect mem, vReg idx) %{
2327   predicate(UseSVE > 0 &&
2328             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2329             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2330              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2331   match(Set dst (LoadVectorGather mem idx));
2332   ins_cost(SVE_COST);
2333   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %}
2334   ins_encode %{
2335     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
2336                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2337   %}
2338   ins_pipe(pipe_slow);
2339 %}
2340 
2341 instruct gatherL(vReg dst, indirect mem, vReg idx) %{
2342   predicate(UseSVE > 0 &&
2343             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2344             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2345              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2346   match(Set dst (LoadVectorGather mem idx));
2347   ins_cost(2 * SVE_COST);
2348   format %{ "sve_uunpklo $idx, $idx\n\t"
2349             "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %}
2350   ins_encode %{
2351     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2352     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
2353   %}
2354   ins_pipe(pipe_slow);
2355 %}
2356 
2357 // ------------------------------ Vector Load Gather Partial-------------------------------
2358 
2359 instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
2360   predicate(UseSVE > 0 &&
2361             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2362             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2363              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2364   match(Set dst (LoadVectorGather mem idx));
2365   effect(TEMP pTmp, KILL cr);
2366   ins_cost(2 * SVE_COST + INSN_COST);
2367   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
2368             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %}
2369   ins_encode %{
2370     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
2371                           Matcher::vector_length(this));
2372     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
2373                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2374   %}
2375   ins_pipe(pipe_slow);
2376 %}
2377 
2378 instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
2379   predicate(UseSVE > 0 &&
2380             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2381             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2382              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2383   match(Set dst (LoadVectorGather mem idx));
2384   effect(TEMP pTmp, KILL cr);
2385   ins_cost(3 * SVE_COST + INSN_COST);
2386   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
2387             "sve_uunpklo $idx, $idx\n\t"
2388             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %}
2389   ins_encode %{
2390     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
2391                           Matcher::vector_length(this));
2392     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2393     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
2394                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2395   %}
2396   ins_pipe(pipe_slow);
2397 %}
2398 
2399 // ------------------------------ Vector Store Scatter -------------------------------
2400 
2401 instruct scatterI(indirect mem, vReg src, vReg idx) %{
2402   predicate(UseSVE > 0 &&
2403             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2404             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2405              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2406   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2407   ins_cost(SVE_COST);
2408   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
2409   ins_encode %{
2410     __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
2411                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2412   %}
2413   ins_pipe(pipe_slow);
2414 %}
2415 
2416 instruct scatterL(indirect mem, vReg src, vReg idx) %{
2417   predicate(UseSVE > 0 &&
2418             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2419             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2420              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2421   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2422   ins_cost(2 * SVE_COST);
2423   format %{ "sve_uunpklo $idx, $idx\n\t"
2424             "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %}
2425   ins_encode %{
2426     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D,
2427                    as_FloatRegister($idx$$reg));
2428     __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
2429                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2430   %}
2431   ins_pipe(pipe_slow);
2432 %}
2433 
2434 // ------------------------------ Vector Store Scatter Partial-------------------------------
2435 
2436 instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
2437   predicate(UseSVE > 0 &&
2438             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2439             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2440              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2441   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2442   effect(TEMP pTmp, KILL cr);
2443   ins_cost(2 * SVE_COST + INSN_COST);
2444   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
2445             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %}
2446   ins_encode %{
2447     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
2448                           Matcher::vector_length(this, $src));
2449     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
2450                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2451   %}
2452   ins_pipe(pipe_slow);
2453 %}
2454 
2455 instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
2456   predicate(UseSVE > 0 &&
2457             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2458             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2459              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2460   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2461   effect(TEMP pTmp, KILL cr);
2462   ins_cost(3 * SVE_COST + INSN_COST);
2463   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
2464             "sve_uunpklo $idx, $idx\n\t"
2465             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %}
2466   ins_encode %{
2467     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
2468                           Matcher::vector_length(this, $src));
2469     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2470     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
2471                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2472   %}
2473   ins_pipe(pipe_slow);
2474 %}
2475 
2476 
2477 // ------------------------------ Vector Load Const -------------------------------
2478 
2479 instruct loadconB(vReg dst, immI0 src) %{
2480   predicate(UseSVE > 0 &&
2481             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2482   match(Set dst (VectorLoadConst src));
2483   ins_cost(SVE_COST);
2484   format %{ "sve_index $dst, 0, 1\t# generate iota indices" %}
2485   ins_encode %{
2486     __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1);
2487   %}
2488   ins_pipe(pipe_slow);
2489 %}
2490 
2491 // Intrisics for String.indexOf(char)
2492 
2493 dnl
2494 define(`STRING_INDEXOF_CHAR', `
2495 instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
2496                                   iRegI_R0 result, vReg ztmp1, vReg ztmp2,
2497                                   pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
2498 %{
2499   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
2500   predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1));
2501   effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
2502 
2503   format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
2504 
2505   ins_encode %{
2506     __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
2507                                as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
2508                                as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */);
2509   %}
2510   ins_pipe(pipe_class_memory);
2511 %}')dnl
2512 dnl                 $1 $2      $3
2513 STRING_INDEXOF_CHAR(L, Latin1, true)
2514 STRING_INDEXOF_CHAR(U, UTF16,  false)
2515 
2516 dnl
2517 dnl VMASK_REDUCTION($1,     $2,      $3  )
2518 dnl VMASK_REDUCTION(suffix, op_name, cost)
2519 define(`VMASK_REDUCTION', `
2520 instruct vmask_$1(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
2521   predicate(UseSVE > 0 &&
2522             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
2523   match(Set dst ($2 src));
2524   effect(TEMP ptmp, KILL cr);
2525   ins_cost($3 * SVE_COST);
2526   format %{ "vmask_$1 $dst, $src\t# vector mask $1 (sve)" %}
2527   ins_encode %{
2528     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
2529                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
2530   %}
2531   ins_pipe(pipe_slow);
2532 %}')dnl
2533 dnl
2534 // ---------------------------- Vector mask reductions ---------------------------
2535 VMASK_REDUCTION(truecount, VectorMaskTrueCount, 2)
2536 VMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3)
2537 VMASK_REDUCTION(lasttrue,  VectorMaskLastTrue, 4)
2538 dnl
2539 dnl VMASK_REDUCTION_PARTIAL($1,     $2,      $3  )
2540 dnl VMASK_REDUCTION_PARTIAL(suffix, op_name, cost)
2541 define(`VMASK_REDUCTION_PARTIAL', `
2542 instruct vmask_$1_partial(iRegINoSp dst, vReg src, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{
2543   predicate(UseSVE > 0 &&
2544             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
2545   match(Set dst ($2 src));
2546   effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr);
2547   ins_cost($3 * SVE_COST);
2548   format %{ "vmask_$1 $dst, $src\t# vector mask $1 partial (sve)" %}
2549   ins_encode %{
2550     __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), __ B,
2551                           Matcher::vector_length(this, $src));
2552     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
2553                            as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg));
2554   %}
2555   ins_pipe(pipe_slow);
2556 %}')dnl
2557 dnl
2558 VMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3)
2559 VMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4)
2560 VMASK_REDUCTION_PARTIAL(lasttrue,  VectorMaskLastTrue, 5)
2561 
2562 dnl
2563 dnl VSTOREMASK_REDUCTION($1,     $2,      $3  )
2564 dnl VSTOREMASK_REDUCTION(suffix, op_name, cost)
2565 define(`VSTOREMASK_REDUCTION', `
2566 instruct vstoremask_$1(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
2567   predicate(UseSVE > 0 &&
2568             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
2569   match(Set dst ($2 (VectorStoreMask src esize)));
2570   effect(TEMP ptmp, KILL cr);
2571   ins_cost($3 * SVE_COST);
2572   format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 (sve)" %}
2573   ins_encode %{
2574     unsigned size = $esize$$constant;
2575     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
2576     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
2577     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
2578                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
2579   %}
2580   ins_pipe(pipe_slow);
2581 %}')dnl
2582 dnl
2583 // ----------------- Vector mask reductions combined with VectorMaskStore ---------------
2584 VSTOREMASK_REDUCTION(truecount, VectorMaskTrueCount, 2)
2585 VSTOREMASK_REDUCTION(firsttrue, VectorMaskFirstTrue, 3)
2586 VSTOREMASK_REDUCTION(lasttrue,  VectorMaskLastTrue, 4)
2587 dnl
2588 dnl VSTOREMASK_REDUCTION_PARTIAL($1,     $2,      $3  )
2589 dnl VSTOREMASK_REDUCTION_PARTIAL(suffix, op_name, cost)
2590 define(`VSTOREMASK_REDUCTION_PARTIAL', `
2591 instruct vstoremask_$1_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ifelse($1, `firsttrue', `pgtmp, pReg ptmp', `ptmp'), rFlagsReg cr) %{
2592   predicate(UseSVE > 0 &&
2593             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
2594   match(Set dst ($2 (VectorStoreMask src esize)));
2595   effect(TEMP ifelse($1, `firsttrue', `pgtmp, TEMP ptmp', `ptmp'), KILL cr);
2596   ins_cost($3 * SVE_COST);
2597   format %{ "vstoremask_$1 $dst, $src\t# vector mask $1 partial (sve)" %}
2598   ins_encode %{
2599     unsigned size = $esize$$constant;
2600     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
2601     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
2602     __ sve_whilelo_zr_imm(as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), variant,
2603                           Matcher::vector_length(this, $src));
2604     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
2605                            as_PRegister(ifelse($1, `firsttrue', `$pgtmp', `$ptmp')$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
2606   %}
2607   ins_pipe(pipe_slow);
2608 %}')dnl
2609 dnl
2610 VSTOREMASK_REDUCTION_PARTIAL(truecount, VectorMaskTrueCount, 3)
2611 VSTOREMASK_REDUCTION_PARTIAL(firsttrue, VectorMaskFirstTrue, 4)
2612 VSTOREMASK_REDUCTION_PARTIAL(lasttrue,  VectorMaskLastTrue, 5)