1 //
   2 // Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 dnl Generate the warning
  27 // This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
  28 dnl
  29 
  30 // AArch64 SVE Architecture Description File
  31 
  32 dnl
  33 define(`TYPE2DATATYPE',
  34 `ifelse($1, `B', `BYTE',
  35         $1, `S', `SHORT',
  36         $1, `I', `INT',
  37         $1, `L', `LONG',
  38         $1, `F', `FLOAT',
  39         $1, `D', `DOUBLE',
  40         `error($1)')')dnl
  41 dnl
  42 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1,            $2,       $3       $4   )
  43 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len, scale)
  44 define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
  45 operand vmemA_imm$1Offset$3()
  46 %{
  47   // (esize / msize) = $4
  48   predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
  49             Matcher::scalable_vector_reg_size(T_BYTE)ifelse($4, `1', `', ` / $4')));
  50   match(Con$1);
  51 
  52   op_cost(0);
  53   format %{ %}
  54   interface(CONST_INTER);
  55 %}')dnl
  56 
  57 // 4 bit signed offset -- for predicated load/store
  58 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int,  4, 1)
  59 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4, 1)
  60 dnl
  61 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1,            $2     )
  62 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
  63 define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
  64 operand vmemA_indOff$1$2$3(iRegP reg, vmemA_imm$1Offset$2 off)
  65 %{
  66   constraint(ALLOC_IN_RC(ptr_reg));
  67   match(AddP reg off);
  68   op_cost(0);
  69   format %{ "[$reg, $off]" %}
  70   interface(MEMORY_INTER) %{
  71     base($reg);
  72     `index'(0xffffffff);
  73     scale(0x0);
  74     disp($off);
  75   %}
  76 %}')dnl
  77 OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
  78 OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
  79 
  80 // The indOff of vmemA is valid only when the vector element (load to/store from)
  81 // size equals to memory element (load from/store to) size.
  82 opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  83 
  84 source_hpp %{
  85   bool op_sve_supported(int opcode, int vlen, BasicType bt);
  86   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt);
  87 %}
  88 
  89 source %{
  90 
  91   typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
  92                                                              PRegister Pg, const Address &adr);
  93 
  94   // Predicated load/store, with optional ptrue to all elements of given predicate register.
  95   static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
  96                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
  97                                     int opcode, Register base, int index, int size, int disp) {
  98     sve_mem_insn_predicate insn;
  99     int mesize = type2aelembytes(mem_elem_bt);
 100     if (index == -1) {
 101       assert(size == 0, "unsupported address mode: scale size = %d", size);
 102       switch(mesize) {
 103       case 1:
 104         insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
 105         break;
 106       case 2:
 107         insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
 108         break;
 109       case 4:
 110         insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
 111         break;
 112       case 8:
 113         insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
 114         break;
 115       default:
 116         assert(false, "unsupported");
 117         ShouldNotReachHere();
 118       }
 119       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
 120       (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
 121     } else {
 122       assert(false, "unimplemented");
 123       ShouldNotReachHere();
 124     }
 125   }
 126 
 127   bool op_sve_supported(int opcode, int vlen, BasicType bt) {
 128     int length_in_bytes = vlen * type2aelembytes(bt);
 129     switch (opcode) {
 130       case Op_MulAddVS2VI:
 131       // No multiply reduction instructions
 132       case Op_MulReductionVD:
 133       case Op_MulReductionVF:
 134       case Op_MulReductionVI:
 135       case Op_MulReductionVL:
 136       // Others
 137       case Op_ExtractC:
 138       case Op_ExtractUB:
 139         return false;
 140       // Vector API specific
 141       case Op_VectorLoadShuffle:
 142       case Op_VectorRearrange:
 143         return vlen >= 4 && length_in_bytes <= MaxVectorSize;
 144       case Op_LoadVector:
 145       case Op_StoreVector:
 146         return Matcher::vector_size_supported(bt, vlen);
 147       case Op_ExpandV:
 148         if (UseSVE < 2 || is_subword_type(bt)) return false;
 149       default:
 150         break;
 151     }
 152     // By default, we only support vector operations with no less than 8 bytes and 2 elements.
 153     return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
 154   }
 155 
 156   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt) {
 157     if (opcode == Op_VectorRearrange) {
 158       return false;
 159     }
 160     return op_sve_supported(opcode, vlen, bt);
 161   }
 162 %}
 163 
 164 definitions %{
 165   int_def SVE_COST             (200, 200);
 166 %}
 167 
 168 dnl
 169 dnl ELEMENT_SHORT_CHART($1, $2)
 170 dnl ELEMENT_SHORT_CHART(etype, node)
 171 define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
 172   `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
 173             ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
 174    `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')dnl
 175 dnl
 176 
 177 // All SVE instructions
 178 
 179 // vector load/store
 180 
 181 // Unpredicated vector load/store
 182 instruct loadV(vReg dst, vmemA mem) %{
 183   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 &&
 184             n->as_LoadVector()->memory_size() == MaxVectorSize);
 185   match(Set dst (LoadVector mem));
 186   ins_cost(4 * SVE_COST);
 187   format %{ "sve_ldr $dst, $mem\t# vector (sve)" %}
 188   ins_encode %{
 189     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 190     BasicType bt = Matcher::vector_element_basic_type(this);
 191     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 192                           bt, bt, $mem->opcode(),
 193                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 194   %}
 195   ins_pipe(pipe_slow);
 196 %}
 197 
 198 instruct storeV(vReg src, vmemA mem) %{
 199   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 &&
 200             n->as_StoreVector()->memory_size() == MaxVectorSize);
 201   match(Set mem (StoreVector mem src));
 202   ins_cost(4 * SVE_COST);
 203   format %{ "sve_str $mem, $src\t# vector (sve)" %}
 204   ins_encode %{
 205     FloatRegister src_reg = as_FloatRegister($src$$reg);
 206     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 207     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
 208                           bt, bt, $mem->opcode(),
 209                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 210   %}
 211   ins_pipe(pipe_slow);
 212 %}dnl
 213 
 214 dnl
 215 define(`VLoadStore', `
 216 // ifelse(load, $3, Load, Store) Vector ($6 bits)
 217 instruct $3V$4_vreg`'(vReg $7, vmem$4 mem)
 218 %{
 219   predicate(UseSVE > 0 && `n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4);
 220   match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src)));
 221   ins_cost(4 * INSN_COST);
 222   format %{ "$1   ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %}
 223   ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) );
 224   ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64));
 225 %}')dnl
 226 dnl        $1    $2 $3     $4  $5 $6   $7
 227 VLoadStore(ldrh, H, load,  2,  D, 16,  dst)
 228 VLoadStore(strh, H, store, 2,  D, 16,  src)
 229 VLoadStore(ldrs, S, load,  4,  D, 32,  dst)
 230 VLoadStore(strs, S, store, 4,  D, 32,  src)
 231 VLoadStore(ldrd, D, load,  8,  D, 64,  dst)
 232 VLoadStore(strd, D, store, 8,  D, 64,  src)
 233 VLoadStore(ldrq, Q, load, 16,  X, 128, dst)
 234 VLoadStore(strq, Q, store, 16, X, 128, src)
 235 
 236 // Predicated vector load/store, based on the vector length of the node.
 237 // Only load/store values in the range of the memory_size. This is needed
 238 // when the memory_size is lower than the hardware supported max vector size.
 239 // And this might happen for Vector API mask vector load/store.
 240 instruct loadV_partial(vReg dst, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
 241   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
 242             n->as_LoadVector()->memory_size() < MaxVectorSize);
 243   match(Set dst (LoadVector mem));
 244   effect(TEMP pgtmp, KILL cr);
 245   ins_cost(6 * SVE_COST);
 246   format %{ "sve_ptrue $pgtmp, vector_length\n\t"
 247             "sve_ldr $dst, $pgtmp, $mem\t# load vector partial" %}
 248   ins_encode %{
 249     BasicType bt = Matcher::vector_element_basic_type(this);
 250     __ sve_ptrue_lanecnt(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 251                          Matcher::vector_length(this));
 252     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 253     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
 254                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 255                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 256   %}
 257   ins_pipe(pipe_slow);
 258 %}
 259 
 260 instruct storeV_partial(vReg src, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
 261   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
 262             n->as_StoreVector()->memory_size() < MaxVectorSize);
 263   match(Set mem (StoreVector mem src));
 264   effect(TEMP pgtmp, KILL cr);
 265   ins_cost(5 * SVE_COST);
 266   format %{ "sve_ptrue $pgtmp, vector_length\n\t"
 267             "sve_str $src, $pgtmp, $mem\t# store vector partial" %}
 268   ins_encode %{
 269     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 270     __ sve_ptrue_lanecnt(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 271                          Matcher::vector_length(this, $src));
 272     FloatRegister src_reg = as_FloatRegister($src$$reg);
 273     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
 274                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 275                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 276   %}
 277   ins_pipe(pipe_slow);
 278 %}
 279 
 280 // vector load/store - predicated
 281 
 282 instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
 283   predicate(UseSVE > 0);
 284   match(Set dst (LoadVectorMasked mem pg));
 285   ins_cost(4 * SVE_COST);
 286   format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated (sve)" %}
 287   ins_encode %{
 288     BasicType bt = Matcher::vector_element_basic_type(this);
 289     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg),
 290                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
 291                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 292   %}
 293   ins_pipe(pipe_slow);
 294 %}
 295 
 296 instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
 297   predicate(UseSVE > 0);
 298   match(Set mem (StoreVectorMasked mem (Binary src pg)));
 299   ins_cost(4 * SVE_COST);
 300   format %{ "sve_str $mem, $pg, $src\t# store vector predicated (sve)" %}
 301   ins_encode %{
 302     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 303     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg),
 304                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
 305                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 306   %}
 307   ins_pipe(pipe_slow);
 308 %}
 309 
 310 dnl
 311 dnl MASK_LOGICAL_OP($1,        $2,      $3  )
 312 dnl MASK_LOGICAL_OP(insn_name, op_name, insn)
 313 define(`MASK_LOGICAL_OP', `
 314 instruct vmask_$1(pRegGov pd, pRegGov pn, pRegGov pm) %{
 315   predicate(UseSVE > 0);
 316   match(Set pd ($2 pn pm));
 317   ins_cost(SVE_COST);
 318   format %{ "$3 $pd, $pn, $pm\t# predicate (sve)" %}
 319   ins_encode %{
 320     __ $3(as_PRegister($pd$$reg), ptrue,
 321                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
 322   %}
 323   ins_pipe(pipe_slow);
 324 %}')dnl
 325 dnl
 326 // mask logical and/or/xor
 327 MASK_LOGICAL_OP(and, AndVMask, sve_and)
 328 MASK_LOGICAL_OP(or, OrVMask, sve_orr)
 329 MASK_LOGICAL_OP(xor, XorVMask, sve_eor)
 330 
 331 dnl
 332 dnl MASK_LOGICAL_AND_NOT($1,   $2  )
 333 dnl MASK_LOGICAL_AND_NOT(type, size)
 334 define(`MASK_LOGICAL_AND_NOT', `
 335 instruct vmask_and_not$1(pRegGov pd, pRegGov pn, pRegGov pm, imm$1_M1 m1) %{
 336   predicate(UseSVE > 0);
 337   match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1))));
 338   ins_cost(SVE_COST);
 339   format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) ($2)" %}
 340   ins_encode %{
 341     __ sve_bic(as_PRegister($pd$$reg), ptrue,
 342                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
 343   %}
 344   ins_pipe(pipe_slow);
 345 %}')dnl
 346 dnl
 347 // mask logical and_not
 348 MASK_LOGICAL_AND_NOT(I, B/H/S)
 349 MASK_LOGICAL_AND_NOT(L, D)
 350 
 351 // vector reinterpret
 352 
 353 instruct reinterpret(vReg dst) %{
 354   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
 355                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst
 356   match(Set dst (VectorReinterpret dst));
 357   ins_cost(0);
 358   format %{ "# reinterpret $dst\t# do nothing" %}
 359   ins_encode %{
 360     // empty
 361   %}
 362   ins_pipe(pipe_class_empty);
 363 %}
 364 
 365 instruct reinterpretResize(vReg dst, vReg src, pRegGov pgtmp, rFlagsReg cr) %{
 366   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
 367                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
 368   match(Set dst (VectorReinterpret src));
 369   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
 370   ins_cost(3 * SVE_COST);
 371   format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
 372   ins_encode %{
 373     uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
 374     uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
 375     uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
 376                                   length_in_bytes_src : length_in_bytes_dst;
 377     assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
 378            "invalid vector length");
 379     __ sve_ptrue_lanecnt(as_PRegister($pgtmp$$reg), __ B, length_in_bytes_resize);
 380     __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
 381     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pgtmp$$reg),
 382                as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
 383   %}
 384   ins_pipe(pipe_slow);
 385 %}
 386 
 387 // vector mask reinterpret
 388 
 389 instruct vmask_reinterpret_same_esize(pRegGov dst_src) %{
 390   predicate(UseSVE > 0 &&
 391             n->as_Vector()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
 392             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
 393   match(Set dst_src (VectorReinterpret dst_src));
 394   ins_cost(0);
 395   format %{ "# vmask_reinterpret $dst_src\t# do nothing" %}
 396   ins_encode %{
 397     // empty
 398   %}
 399   ins_pipe(pipe_class_empty);
 400 %}
 401 
 402 instruct vmask_reinterpret_diff_esize(pRegGov dst, pRegGov src, vReg tmp, rFlagsReg cr) %{
 403   predicate(UseSVE > 0 &&
 404             n->as_Vector()->length() != n->in(1)->bottom_type()->is_vect()->length() &&
 405             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
 406   match(Set dst (VectorReinterpret src));
 407   effect(TEMP tmp, KILL cr);
 408   ins_cost(2 * SVE_COST);
 409   format %{ "# vmask_reinterpret $dst, $src\t# vector (sve)" %}
 410   ins_encode %{
 411     BasicType from_bt = Matcher::vector_element_basic_type(this, $src);
 412     Assembler::SIMD_RegVariant from_size = __ elemType_to_regVariant(from_bt);
 413     BasicType to_bt = Matcher::vector_element_basic_type(this);
 414     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
 415     __ sve_cpy(as_FloatRegister($tmp$$reg), from_size, as_PRegister($src$$reg), -1, false);
 416     __ sve_cmp(Assembler::EQ, as_PRegister($dst$$reg), to_size, ptrue, as_FloatRegister($tmp$$reg), -1);
 417   %}
 418   ins_pipe(pipe_slow);
 419 %}
 420 dnl
 421 dnl UNARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4  )
 422 dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, insn)
 423 define(`UNARY_OP_TRUE_PREDICATE', `
 424 instruct $1(vReg dst, vReg src) %{
 425   predicate(UseSVE > 0 &&
 426             !n->as_Vector()->is_predicated_vector());
 427   match(Set dst ($2 src));
 428   ins_cost(SVE_COST);
 429   format %{ "$4 $dst, $src\t# vector (sve) ($3)" %}
 430   ins_encode %{dnl
 431 ifelse($1, `vnegI', `
 432     BasicType bt = Matcher::vector_element_basic_type(this);', `')
 433     __ $4(as_FloatRegister($dst$$reg), ifelse($1, `vnegI', `__ elemType_to_regVariant(bt)', `__ $3'),
 434          ptrue, as_FloatRegister($src$$reg));
 435   %}
 436   ins_pipe(pipe_slow);
 437 %}')dnl
 438 dnl
 439 
 440 // vector abs
 441 UNARY_OP_TRUE_PREDICATE(vabsB, AbsVB, B, sve_abs)
 442 UNARY_OP_TRUE_PREDICATE(vabsS, AbsVS, H, sve_abs)
 443 UNARY_OP_TRUE_PREDICATE(vabsI, AbsVI, S, sve_abs)
 444 UNARY_OP_TRUE_PREDICATE(vabsL, AbsVL, D, sve_abs)
 445 UNARY_OP_TRUE_PREDICATE(vabsF, AbsVF, S, sve_fabs)
 446 UNARY_OP_TRUE_PREDICATE(vabsD, AbsVD, D, sve_fabs)
 447 
 448 dnl UNARY_OP_PREDICATE($1,        $2,      $3,   $4  )
 449 dnl UNARY_OP_PREDICATE(insn_name, op_name, size, insn)
 450 define(`UNARY_OP_PREDICATE', `
 451 instruct $1_masked(vReg dst_src, pRegGov pg) %{
 452   predicate(UseSVE > 0);
 453   match(Set dst_src ($2 dst_src pg));
 454   ins_cost(SVE_COST);
 455   format %{ "$4 $dst_src, $pg, $dst_src\t# vector (sve) ($3)" %}
 456   ins_encode %{dnl
 457 ifelse($1, `vnegI', `
 458     BasicType bt = Matcher::vector_element_basic_type(this);', `')
 459     __ $4(as_FloatRegister($dst_src$$reg), ifelse($1, `vnegI', `__ elemType_to_regVariant(bt)', `__ $3'),
 460             as_PRegister($pg$$reg),
 461             as_FloatRegister($dst_src$$reg));
 462   %}
 463   ins_pipe(pipe_slow);
 464 %}')dnl
 465 // vector abs - predicated
 466 UNARY_OP_PREDICATE(vabsB, AbsVB, B, sve_abs)
 467 UNARY_OP_PREDICATE(vabsS, AbsVS, H, sve_abs)
 468 UNARY_OP_PREDICATE(vabsI, AbsVI, S, sve_abs)
 469 UNARY_OP_PREDICATE(vabsL, AbsVL, D, sve_abs)
 470 UNARY_OP_PREDICATE(vabsF, AbsVF, S, sve_fabs)
 471 UNARY_OP_PREDICATE(vabsD, AbsVD, D, sve_fabs)
 472 
 473 dnl
 474 dnl BINARY_OP_UNPREDICATE($1,        $2       $3,   $4           $5  )
 475 dnl BINARY_OP_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
 476 define(`BINARY_OP_UNPREDICATE', `
 477 instruct $1(vReg dst, vReg src1, vReg src2) %{
 478   predicate(UseSVE > 0);
 479   match(Set dst ($2 src1 src2));
 480   ins_cost(SVE_COST);
 481   format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
 482   ins_encode %{
 483     __ $5(as_FloatRegister($dst$$reg), __ $3,
 484          as_FloatRegister($src1$$reg),
 485          as_FloatRegister($src2$$reg));
 486   %}
 487   ins_pipe(pipe_slow);
 488 %}')dnl
 489 dnl
 490 dnl
 491 dnl BINARY_OP_PREDICATE($1,        $2,      $3,   $4  )
 492 dnl BINARY_OP_PREDICATE(insn_name, op_name, size, insn)
 493 define(`BINARY_OP_PREDICATE', `
 494 instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 495   predicate(UseSVE > 0);
 496   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 497   ins_cost(SVE_COST);
 498   format %{ "$4 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) ($3)" %}
 499   ins_encode %{
 500     __ $4(as_FloatRegister($dst_src1$$reg), __ $3,
 501             as_PRegister($pg$$reg),
 502             as_FloatRegister($src2$$reg));
 503   %}
 504   ins_pipe(pipe_slow);
 505 %}')dnl
 506 dnl
 507 // vector add
 508 BINARY_OP_UNPREDICATE(vaddB, AddVB, B, 16, sve_add)
 509 BINARY_OP_UNPREDICATE(vaddS, AddVS, H, 8,  sve_add)
 510 BINARY_OP_UNPREDICATE(vaddI, AddVI, S, 4,  sve_add)
 511 BINARY_OP_UNPREDICATE(vaddL, AddVL, D, 2,  sve_add)
 512 BINARY_OP_UNPREDICATE(vaddF, AddVF, S, 4,  sve_fadd)
 513 BINARY_OP_UNPREDICATE(vaddD, AddVD, D, 2,  sve_fadd)
 514 
 515 // vector add - predicated
 516 BINARY_OP_PREDICATE(vaddB, AddVB, B, sve_add)
 517 BINARY_OP_PREDICATE(vaddS, AddVS, H, sve_add)
 518 BINARY_OP_PREDICATE(vaddI, AddVI, S, sve_add)
 519 BINARY_OP_PREDICATE(vaddL, AddVL, D, sve_add)
 520 BINARY_OP_PREDICATE(vaddF, AddVF, S, sve_fadd)
 521 BINARY_OP_PREDICATE(vaddD, AddVD, D, sve_fadd)
 522 dnl
 523 dnl ADD_IMM($1,          $2,   $3      )
 524 dnl ADD_IMM(name_suffix, size, imm_type)
 525 define(`ADD_IMM', `
 526 instruct vaddImm$1(vReg dst_src, $3 con) %{
 527   predicate(UseSVE > 0);
 528   match(Set dst_src (AddV$1 dst_src (Replicate$1 con)));
 529   ins_cost(SVE_COST);
 530   format %{ "sve_add $dst_src, $dst_src, $con\t # vector (sve) ($2)" %}
 531   ins_encode %{
 532     int32_t val = $con$$constant;
 533     if (val > 0){
 534       __ sve_add(as_FloatRegister($dst_src$$reg), __ $2, val);
 535     } else if (val < 0){
 536       __ sve_sub(as_FloatRegister($dst_src$$reg), __ $2, -val);
 537     }
 538   %}
 539   ins_pipe(pipe_slow);
 540 %}')dnl
 541 
 542 // vector add reg imm (unpredicated)
 543 ADD_IMM(B, B, immBAddSubV)
 544 ADD_IMM(S, H, immIAddSubV)
 545 ADD_IMM(I, S, immIAddSubV)
 546 ADD_IMM(L, D, immLAddSubV)
 547 dnl
 548 dnl BITWISE_OP_IMM($1,        $2        $3,   $4    $5      )
 549 dnl BITWISE_OP_IMM(insn_name, op_name1, size, type, op_name2)
 550 define(`BITWISE_OP_IMM', `
 551 instruct $1(vReg dst_src, imm$4Log con) %{
 552   predicate(UseSVE > 0);
 553   match(Set dst_src ($2 dst_src (Replicate$4 con)));
 554   ins_cost(SVE_COST);
 555   format %{ "$5 $dst_src, $dst_src, $con\t # vector (sve) ($3)" %}
 556   ins_encode %{
 557     __ $5(as_FloatRegister($dst_src$$reg), __ $3,
 558          (uint64_t)($con$$constant));
 559   %}
 560   ins_pipe(pipe_slow);
 561 %}')dnl
 562 
 563 // vector binary op reg imm (unpredicated)
 564 BITWISE_OP_IMM(vandB, AndV, B, B, sve_and)
 565 BITWISE_OP_IMM(vandH, AndV, H, S, sve_and)
 566 BITWISE_OP_IMM(vandS, AndV, S, I, sve_and)
 567 BITWISE_OP_IMM(vandD, AndV, D, L, sve_and)
 568 BITWISE_OP_IMM(vorB,  OrV,  B, B, sve_orr)
 569 BITWISE_OP_IMM(vorH,  OrV,  H, S, sve_orr)
 570 BITWISE_OP_IMM(vorS,  OrV,  S, I, sve_orr)
 571 BITWISE_OP_IMM(vorD,  OrV,  D, L, sve_orr)
 572 BITWISE_OP_IMM(vxorB, XorV, B, B, sve_eor)
 573 BITWISE_OP_IMM(vxorH, XorV, H, S, sve_eor)
 574 BITWISE_OP_IMM(vxorS, XorV, S, I, sve_eor)
 575 BITWISE_OP_IMM(vxorD, XorV, D, L, sve_eor)
 576 dnl
 577 dnl
 578 dnl BINARY_OP_UNSIZED($1,        $2,      $3  )
 579 dnl BINARY_OP_UNSIZED(insn_name, op_name, insn)
 580 define(`BINARY_OP_UNSIZED', `
 581 instruct $1(vReg dst, vReg src1, vReg src2) %{
 582   predicate(UseSVE > 0);
 583   match(Set dst ($2 src1 src2));
 584   ins_cost(SVE_COST);
 585   format %{ "$3  $dst, $src1, $src2\t# vector (sve)" %}
 586   ins_encode %{
 587     __ $3(as_FloatRegister($dst$$reg),
 588          as_FloatRegister($src1$$reg),
 589          as_FloatRegister($src2$$reg));
 590   %}
 591   ins_pipe(pipe_slow);
 592 %}')dnl
 593 dnl
 594 // vector and
 595 BINARY_OP_UNSIZED(vand, AndV, sve_and)
 596 
 597 // vector or
 598 BINARY_OP_UNSIZED(vor, OrV, sve_orr)
 599 
 600 // vector xor
 601 BINARY_OP_UNSIZED(vxor, XorV, sve_eor)
 602 
 603 dnl BINARY_LOGIC_OP_PREDICATE($1,        $2,      $3  )
 604 dnl BINARY_LOGIC_OP_PREDICATE(insn_name, op_name, insn)
 605 define(`BINARY_LOGIC_OP_PREDICATE', `
 606 instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 607   predicate(UseSVE > 0);
 608   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 609   ins_cost(SVE_COST);
 610   format %{ "$3 $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %}
 611   ins_encode %{
 612     BasicType bt = Matcher::vector_element_basic_type(this);
 613     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 614     __ $3(as_FloatRegister($dst_src1$$reg), size,
 615           as_PRegister($pg$$reg),
 616           as_FloatRegister($src2$$reg));
 617   %}
 618   ins_pipe(pipe_slow);
 619 %}')dnl
 620 dnl
 621 // vector and - predicated
 622 BINARY_LOGIC_OP_PREDICATE(vand, AndV, sve_and)
 623 
 624 // vector or - predicated
 625 BINARY_LOGIC_OP_PREDICATE(vor, OrV, sve_orr)
 626 
 627 // vector xor - predicated
 628 BINARY_LOGIC_OP_PREDICATE(vxor, XorV, sve_eor)
 629 
 630 // vector not
 631 dnl
 632 define(`MATCH_RULE', `ifelse($1, I,
 633 `match(Set dst (XorV src (ReplicateB m1)));
 634   match(Set dst (XorV src (ReplicateS m1)));
 635   match(Set dst (XorV src (ReplicateI m1)));',
 636 `match(Set dst (XorV src (ReplicateL m1)));')')dnl
 637 dnl
 638 define(`VECTOR_NOT', `
 639 instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{
 640   predicate(UseSVE > 0);
 641   MATCH_RULE($1)
 642   ins_cost(SVE_COST);
 643   format %{ "sve_not $dst, $src\t# vector (sve) $2" %}
 644   ins_encode %{
 645     __ sve_not(as_FloatRegister($dst$$reg), __ D,
 646                ptrue, as_FloatRegister($src$$reg));
 647   %}
 648   ins_pipe(pipe_slow);
 649 %}')dnl
 650 dnl        $1,$2
 651 VECTOR_NOT(I, B/H/S)
 652 VECTOR_NOT(L, D)
 653 undefine(MATCH_RULE)
 654 dnl
 655 // vector not - predicated
 656 dnl
 657 define(`MATCH_RULE', `ifelse($1, I,
 658 `match(Set dst (XorV (Binary src (ReplicateB m1)) pg));
 659   match(Set dst (XorV (Binary src (ReplicateS m1)) pg));
 660   match(Set dst (XorV (Binary src (ReplicateI m1)) pg));',
 661 `match(Set dst (XorV (Binary src (ReplicateL m1)) pg));')')dnl
 662 dnl
 663 define(`VECTOR_NOT_PREDICATE', `
 664 instruct vnot$1_masked`'(vReg dst, vReg src, imm$1_M1 m1, pRegGov pg) %{
 665   predicate(UseSVE > 0);
 666   MATCH_RULE($1)
 667   ins_cost(SVE_COST);
 668   format %{ "sve_not $dst, $pg, $src\t# vector (sve) $2" %}
 669   ins_encode %{
 670     BasicType bt = Matcher::vector_element_basic_type(this);
 671     __ sve_not(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
 672                as_PRegister($pg$$reg), as_FloatRegister($src$$reg));
 673   %}
 674   ins_pipe(pipe_slow);
 675 %}')dnl
 676 dnl                 $1, $2
 677 VECTOR_NOT_PREDICATE(I, B/H/S)
 678 VECTOR_NOT_PREDICATE(L, D)
 679 undefine(MATCH_RULE)
 680 dnl
 681 // vector and_not
 682 dnl
 683 define(`MATCH_RULE', `ifelse($1, I,
 684 `match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
 685   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
 686   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
 687 `match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
 688 dnl
 689 define(`VECTOR_AND_NOT', `
 690 instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{
 691   predicate(UseSVE > 0);
 692   MATCH_RULE($1)
 693   ins_cost(SVE_COST);
 694   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %}
 695   ins_encode %{
 696     __ sve_bic(as_FloatRegister($dst$$reg),
 697                as_FloatRegister($src1$$reg),
 698                as_FloatRegister($src2$$reg));
 699   %}
 700   ins_pipe(pipe_slow);
 701 %}')dnl
 702 dnl            $1,$2
 703 VECTOR_AND_NOT(I, B/H/S)
 704 VECTOR_AND_NOT(L, D)
 705 undefine(MATCH_RULE)
 706 dnl
 707 // vector and_not - predicated
 708 dnl
 709 define(`MATCH_RULE', `ifelse($1, I,
 710 `match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (ReplicateB m1))) pg));
 711   match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (ReplicateS m1))) pg));
 712   match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (ReplicateI m1))) pg));',
 713 `match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (ReplicateL m1))) pg));')')dnl
 714 dnl
 715 define(`VECTOR_AND_NOT_PREDICATE', `
 716 instruct vand_not$1_masked`'(vReg dst_src1, vReg src2, imm$1_M1 m1, pRegGov pg) %{
 717   predicate(UseSVE > 0);
 718   MATCH_RULE($1)
 719   ins_cost(SVE_COST);
 720   format %{ "sve_bic $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) $2" %}
 721   ins_encode %{
 722     BasicType bt = Matcher::vector_element_basic_type(this);
 723     __ sve_bic(as_FloatRegister($dst_src1$$reg), __ elemType_to_regVariant(bt),
 724                as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
 725   %}
 726   ins_pipe(pipe_slow);
 727 %}')dnl
 728 dnl                     $1, $2
 729 VECTOR_AND_NOT_PREDICATE(I, B/H/S)
 730 VECTOR_AND_NOT_PREDICATE(L, D)
 731 undefine(MATCH_RULE)
 732 dnl
 733 dnl VDIVF($1,          $2  , $3         )
 734 dnl VDIVF(name_suffix, size, min_vec_len)
 735 define(`VDIVF', `
 736 instruct vdiv$1(vReg dst_src1, vReg src2) %{
 737   predicate(UseSVE > 0);
 738   match(Set dst_src1 (DivV$1 dst_src1 src2));
 739   ins_cost(SVE_COST);
 740   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
 741   ins_encode %{
 742     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
 743          ptrue, as_FloatRegister($src2$$reg));
 744   %}
 745   ins_pipe(pipe_slow);
 746 %}')dnl
 747 dnl
 748 // vector float div
 749 VDIVF(F, S, 4)
 750 VDIVF(D, D, 2)
 751 
 752 // vector float div - predicated
 753 BINARY_OP_PREDICATE(vfdivF, DivVF, S, sve_fdiv)
 754 BINARY_OP_PREDICATE(vfdivD, DivVD, D, sve_fdiv)
 755 
 756 dnl
 757 dnl VMINMAX($1     , $2, $3   , $4  )
 758 dnl VMINMAX(op_name, op, finsn, insn)
 759 define(`VMINMAX', `
 760 instruct v$1(vReg dst_src1, vReg src2) %{
 761   predicate(UseSVE > 0);
 762   match(Set dst_src1 ($2 dst_src1 src2));
 763   ins_cost(SVE_COST);
 764   format %{ "sve_$1 $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 765   ins_encode %{
 766     BasicType bt = Matcher::vector_element_basic_type(this);
 767     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 768     if (is_floating_point_type(bt)) {
 769       __ $3(as_FloatRegister($dst_src1$$reg), size,
 770                   ptrue, as_FloatRegister($src2$$reg));
 771     } else {
 772       assert(is_integral_type(bt), "unsupported type");
 773       __ $4(as_FloatRegister($dst_src1$$reg), size,
 774                   ptrue, as_FloatRegister($src2$$reg));
 775     }
 776   %}
 777   ins_pipe(pipe_slow);
 778 %}')dnl
 779 dnl
 780 // vector min/max
 781 VMINMAX(min, MinV, sve_fmin, sve_smin)
 782 VMINMAX(max, MaxV, sve_fmax, sve_smax)
 783 
 784 dnl
 785 dnl VMINMAX_PREDICATE($1     , $2, $3   , $4  )
 786 dnl VMINMAX_PREDICATE(op_name, op, finsn, insn)
 787 define(`VMINMAX_PREDICATE', `
 788 instruct v$1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 789   predicate(UseSVE > 0);
 790   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 791   ins_cost(SVE_COST);
 792   format %{ "sve_$1 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %}
 793   ins_encode %{
 794     BasicType bt = Matcher::vector_element_basic_type(this);
 795     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 796     if (is_floating_point_type(bt)) {
 797       __ $3(as_FloatRegister($dst_src1$$reg), size,
 798                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
 799     } else {
 800       assert(is_integral_type(bt), "unsupported type");
 801       __ $4(as_FloatRegister($dst_src1$$reg), size,
 802                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
 803     }
 804   %}
 805   ins_pipe(pipe_slow);
 806 %}')dnl
 807 dnl
 808 // vector min/max - predicated
 809 VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin)
 810 VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax)
 811 
 812 dnl
 813 dnl VFMLA($1           $2  )
 814 dnl VFMLA(name_suffix, size)
 815 define(`VFMLA', `
 816 // dst_src1 = dst_src1 + src2 * src3
 817 instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
 818   predicate(UseFMA && UseSVE > 0);
 819   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
 820   ins_cost(SVE_COST);
 821   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 822   ins_encode %{
 823     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
 824          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 825   %}
 826   ins_pipe(pipe_slow);
 827 %}')dnl
 828 dnl
 829 // vector fmla
 830 VFMLA(F, S)
 831 VFMLA(D, D)
 832 
 833 dnl
 834 dnl VFMAD_PREDICATE($1           $2  )
 835 dnl VFMAD_PREDICATE(name_suffix, size)
 836 define(`VFMAD_PREDICATE', `
 837 // dst_src1 = dst_src1 * src2 + src3
 838 instruct vfmad$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
 839   predicate(UseFMA && UseSVE > 0);
 840   match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary src3 pg)));
 841   ins_cost(SVE_COST);
 842   format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) ($2)" %}
 843   ins_encode %{
 844     __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
 845          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 846   %}
 847   ins_pipe(pipe_slow);
 848 %}')dnl
 849 dnl
 850 // vector fmad - predicated
 851 VFMAD_PREDICATE(F, S)
 852 VFMAD_PREDICATE(D, D)
 853 
 854 dnl
 855 dnl VFMLS1($1           $2  )
 856 dnl VFMLS1(name_suffix, size)
 857 define(`VFMLS1', `
 858 // dst_src1 = dst_src1 + -src2 * src3
 859 // The NegV$1 must not be predicated.
 860 instruct vfmls`$1'1(vReg dst_src1, vReg src2, vReg src3) %{
 861   predicate(UseFMA && UseSVE > 0 &&
 862             !n->in(2)->in(1)->as_Vector()->is_predicated_vector());
 863   match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
 864   ins_cost(SVE_COST);
 865   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 866   ins_encode %{
 867     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
 868          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 869   %}
 870   ins_pipe(pipe_slow);
 871 %}')dnl
 872 dnl
 873 dnl VFMLS2($1           $2  )
 874 dnl VFMLS2(name_suffix, size)
 875 define(`VFMLS2', `
 876 // dst_src1 = dst_src1 + src2 * -src3
 877 // The NegV$1 must not be predicated.
 878 instruct vfmls`$1'2(vReg dst_src1, vReg src2, vReg src3) %{
 879   predicate(UseFMA && UseSVE > 0 &&
 880             !n->in(2)->in(2)->as_Vector()->is_predicated_vector());
 881   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
 882   ins_cost(SVE_COST);
 883   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 884   ins_encode %{
 885     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
 886          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 887   %}
 888   ins_pipe(pipe_slow);
 889 %}')dnl
 890 dnl
 891 // vector fmls
 892 VFMLS1(F, S)
 893 VFMLS2(F, S)
 894 VFMLS1(D, D)
 895 VFMLS2(D, D)
 896 
 897 dnl
 898 dnl VFMSB_PREDICATE($1           $2  )
 899 dnl VFMSB_PREDICATE(name_suffix, size)
 900 define(`VFMSB_PREDICATE', `
 901 // dst_src1 = dst_src1 * -src2 + src3
 902 // The NegV$1 must not be predicated.
 903 instruct vfmsb$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
 904   predicate(UseFMA && UseSVE > 0 &&
 905             !n->in(1)->in(2)->as_Vector()->is_predicated_vector());
 906   match(Set dst_src1 (FmaV$1 (Binary dst_src1 (NegV$1 src2)) (Binary src3 pg)));
 907   ins_cost(SVE_COST);
 908   format %{ "sve_fmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
 909   ins_encode %{
 910     __ sve_fmsb(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
 911          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 912   %}
 913   ins_pipe(pipe_slow);
 914 %}')dnl
 915 dnl
 916 // vector fmsb - predicated
 917 VFMSB_PREDICATE(F, S)
 918 VFMSB_PREDICATE(D, D)
 919 
 920 dnl
 921 dnl VFNMLA1($1           $2  )
 922 dnl VFNMLA1(name_suffix, size)
 923 define(`VFNMLA1', `
 924 // dst_src1 = -dst_src1 + -src2 * src3
 925 // The NegV$1 must not be predicated.
 926 instruct vfnmla`$1'1(vReg dst_src1, vReg src2, vReg src3) %{
 927   predicate(UseFMA && UseSVE > 0 &&
 928             !n->in(1)->as_Vector()->is_predicated_vector() &&
 929             !n->in(2)->in(1)->as_Vector()->is_predicated_vector());
 930   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
 931   ins_cost(SVE_COST);
 932   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 933   ins_encode %{
 934     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
 935          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 936   %}
 937   ins_pipe(pipe_slow);
 938 %}')dnl
 939 dnl
 940 dnl VFNMLA2($1           $2  )
 941 dnl VFNMLA2(name_suffix, size)
 942 define(`VFNMLA2', `
 943 // dst_src1 = -dst_src1 + src2 * -src3
 944 // The NegV$1 must not be predicated.
 945 instruct vfnmla`$1'2(vReg dst_src1, vReg src2, vReg src3) %{
 946   predicate(UseFMA && UseSVE > 0 &&
 947             !n->in(1)->as_Vector()->is_predicated_vector() &&
 948             !n->in(2)->in(2)->as_Vector()->is_predicated_vector());
 949   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
 950   ins_cost(SVE_COST);
 951   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 952   ins_encode %{
 953     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
 954          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 955   %}
 956   ins_pipe(pipe_slow);
 957 %}')dnl
 958 dnl
 959 // vector fnmla
 960 VFNMLA1(F, S)
 961 VFNMLA2(F, S)
 962 VFNMLA1(D, D)
 963 VFNMLA2(D, D)
 964 
 965 dnl
 966 dnl VFNMAD_PREDICATE($1           $2  )
 967 dnl VFNMAD_PREDICATE(name_suffix, size)
 968 define(`VFNMAD_PREDICATE', `
 969 // dst_src1 = -src3 + dst_src1 * -src2
 970 // The NegV$1 must not be predicated.
 971 instruct vfnmad$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
 972   predicate(UseFMA && UseSVE > 0 &&
 973             !n->in(1)->in(2)->as_Vector()->is_predicated_vector() &&
 974             !n->in(2)->in(1)->as_Vector()->is_predicated_vector());
 975   match(Set dst_src1 (FmaV$1 (Binary dst_src1 (NegV$1 src2)) (Binary (NegV$1 src3) pg)));
 976   ins_cost(SVE_COST);
 977   format %{ "sve_fnmad $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
 978   ins_encode %{
 979     __ sve_fnmad(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
 980          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 981   %}
 982   ins_pipe(pipe_slow);
 983 %}')dnl
 984 dnl
 985 // vector fnmad - predicated
 986 VFNMAD_PREDICATE(F, S)
 987 VFNMAD_PREDICATE(D, D)
 988 
 989 dnl
 990 dnl VFNMLS($1           $2  )
 991 dnl VFNMLS(name_suffix, size)
 992 define(`VFNMLS', `
 993 // dst_src1 = -dst_src1 + src2 * src3
 994 // The NegV$1 must not be predicated.
 995 instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
 996   predicate(UseFMA && UseSVE > 0 &&
 997             !n->in(1)->as_Vector()->is_predicated_vector());
 998   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
 999   ins_cost(SVE_COST);
1000   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
1001   ins_encode %{
1002     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
1003          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1004   %}
1005   ins_pipe(pipe_slow);
1006 %}')dnl
1007 dnl
1008 // vector fnmls
1009 VFNMLS(F, S)
1010 VFNMLS(D, D)
1011 
1012 dnl
1013 dnl VFNMSB_PREDICATE($1           $2  )
1014 dnl VFNMSB_PREDICATE(name_suffix, size)
1015 define(`VFNMSB_PREDICATE', `
1016 // dst_src1 = -src3 + dst_src1 * src2
1017 // The NegV$1 must not be predicated.
1018 instruct vfnmsb$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
1019   predicate(UseFMA && UseSVE > 0 &&
1020             !n->in(2)->in(1)->as_Vector()->is_predicated_vector());
1021   match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary (NegV$1 src3) pg)));
1022   ins_cost(SVE_COST);
1023   format %{ "sve_fnmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
1024   ins_encode %{
1025     __ sve_fnmsb(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
1026          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1027   %}
1028   ins_pipe(pipe_slow);
1029 %}')dnl
1030 dnl
1031 // vector fnmsb - predicated
1032 VFNMSB_PREDICATE(F, S)
1033 VFNMSB_PREDICATE(D, D)
1034 
1035 dnl
1036 dnl VMLA($1           $2  )
1037 dnl VMLA(name_suffix, size)
1038 define(`VMLA', `
1039 // dst_src1 = dst_src1 + src2 * src3
1040 instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
1041 %{
1042   predicate(UseSVE > 0);
1043   match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
1044   ins_cost(SVE_COST);
1045   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
1046   ins_encode %{
1047     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
1048       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1049   %}
1050   ins_pipe(pipe_slow);
1051 %}')dnl
1052 dnl
1053 // vector mla
1054 VMLA(B, B)
1055 VMLA(S, H)
1056 VMLA(I, S)
1057 VMLA(L, D)
1058 
1059 dnl
1060 dnl VMLA_PREDICATE($1           $2  )
1061 dnl VMLA_PREDICATE(name_suffix, size)
1062 define(`VMLA_PREDICATE', `
1063 // dst_src1 = dst_src1 + src2 * src3
1064 instruct vmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
1065 %{
1066   predicate(UseSVE > 0);
1067   match(Set dst_src1 (AddV$1 (Binary dst_src1 (MulV$1 src2 src3)) pg));
1068   ins_cost(SVE_COST);
1069   format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) ($2)" %}
1070   ins_encode %{
1071     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
1072          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1073   %}
1074   ins_pipe(pipe_slow);
1075 %}')dnl
1076 dnl
1077 // vector mla - predicated
1078 VMLA_PREDICATE(B, B)
1079 VMLA_PREDICATE(S, H)
1080 VMLA_PREDICATE(I, S)
1081 VMLA_PREDICATE(L, D)
1082 
1083 dnl
1084 dnl VMLS($1           $2  )
1085 dnl VMLS(name_suffix, size)
1086 define(`VMLS', `
1087 // dst_src1 = dst_src1 - src2 * src3
1088 instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
1089 %{
1090   predicate(UseSVE > 0);
1091   match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
1092   ins_cost(SVE_COST);
1093   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
1094   ins_encode %{
1095     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
1096       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1097   %}
1098   ins_pipe(pipe_slow);
1099 %}')dnl
1100 dnl
1101 // vector mls
1102 VMLS(B, B)
1103 VMLS(S, H)
1104 VMLS(I, S)
1105 VMLS(L, D)
1106 
1107 dnl
1108 dnl VMLS_PREDICATE($1           $2  )
1109 dnl VMLS_PREDICATE(name_suffix, size)
1110 define(`VMLS_PREDICATE', `
1111 // dst_src1 = dst_src1 - src2 * src3
1112 instruct vmls$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
1113 %{
1114   predicate(UseSVE > 0);
1115   match(Set dst_src1 (SubV$1 (Binary dst_src1 (MulV$1 src2 src3)) pg));
1116   ins_cost(SVE_COST);
1117   format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) ($2)" %}
1118   ins_encode %{
1119     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
1120          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
1121   %}
1122   ins_pipe(pipe_slow);
1123 %}')dnl
1124 dnl
1125 // vector mls - predicated
1126 VMLS_PREDICATE(B, B)
1127 VMLS_PREDICATE(S, H)
1128 VMLS_PREDICATE(I, S)
1129 VMLS_PREDICATE(L, D)
1130 
1131 dnl
1132 dnl BINARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
1133 dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
1134 define(`BINARY_OP_TRUE_PREDICATE', `
1135 instruct $1(vReg dst_src1, vReg src2) %{
1136   predicate(UseSVE > 0);
1137   match(Set dst_src1 ($2 dst_src1 src2));
1138   ins_cost(SVE_COST);
1139   format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
1140   ins_encode %{
1141     __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
1142          ptrue, as_FloatRegister($src2$$reg));
1143   %}
1144   ins_pipe(pipe_slow);
1145 %}')dnl
1146 dnl
1147 // vector mul
1148 BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
1149 BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8,  sve_mul)
1150 BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4,  sve_mul)
1151 BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2,  sve_mul)
1152 BINARY_OP_UNPREDICATE(vmulF, MulVF, S, 4, sve_fmul)
1153 BINARY_OP_UNPREDICATE(vmulD, MulVD, D, 2, sve_fmul)
1154 
1155 // vector mul - predicated
1156 BINARY_OP_PREDICATE(vmulB, MulVB, B, sve_mul)
1157 BINARY_OP_PREDICATE(vmulS, MulVS, H, sve_mul)
1158 BINARY_OP_PREDICATE(vmulI, MulVI, S, sve_mul)
1159 BINARY_OP_PREDICATE(vmulL, MulVL, D, sve_mul)
1160 BINARY_OP_PREDICATE(vmulF, MulVF, S, sve_fmul)
1161 BINARY_OP_PREDICATE(vmulD, MulVD, D, sve_fmul)
1162 
1163 // vector neg
1164 UNARY_OP_TRUE_PREDICATE(vnegI, NegVI, B/H/S, sve_neg)
1165 UNARY_OP_TRUE_PREDICATE(vnegL, NegVL, D, sve_neg)
1166 UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, sve_fneg)
1167 UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, sve_fneg)
1168 
1169 // vector neg - predicated
1170 UNARY_OP_PREDICATE(vnegI, NegVI, B/H/S, sve_neg)
1171 UNARY_OP_PREDICATE(vnegL, NegVL, D, sve_neg)
1172 UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg)
1173 UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg)
1174 
1175 dnl
1176 dnl VPOPCOUNT($1,          $2  )
1177 dnl VPOPCOUNT(name_suffix, size)
1178 define(`VPOPCOUNT', `
1179 instruct vpopcount$1(vReg dst, vReg src) %{
1180   predicate(UseSVE > 0 &&
1181             !n->as_Vector()->is_predicated_vector()`'ifelse($1, `L', ` &&
1182             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
1183   match(Set dst (PopCountV$1 src));
1184   ins_cost(SVE_COST);
1185   format %{ "sve_cnt $dst, $src\t# vector (sve) ($2)" %}
1186   ins_encode %{
1187     assert(UsePopCountInstruction, "unsupported");dnl
1188 ifelse($1, `I', `
1189     BasicType bt = Matcher::vector_element_basic_type(this);', `')
1190     __ sve_cnt(as_FloatRegister($dst$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
1191          ptrue, as_FloatRegister($src$$reg));
1192   %}
1193   ins_pipe(pipe_slow);
1194 %}')dnl
1195 dnl
1196 // vector popcount
1197 VPOPCOUNT(I, B/H/S)
1198 VPOPCOUNT(L, D)
1199 
1200 // If the PopCountVL is generated by auto-vectorization, the dst basic
1201 // type is T_INT. And once we have unified the type definition for
1202 // Vector API and auto-vectorization, this rule can be merged with
1203 // "vpopcountL" rule.
1204 instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
1205   predicate(UseSVE > 0 &&
1206             !n->as_Vector()->is_predicated_vector() &&
1207             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
1208   match(Set dst (PopCountVL src));
1209   effect(TEMP_DEF dst, TEMP vtmp);
1210   ins_cost(3 * SVE_COST);
1211   format %{ "sve_cnt $dst, $src\n\t"
1212             "sve_dup $vtmp, #0\n\t"
1213             "sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
1214   ins_encode %{
1215     assert(UsePopCountInstruction, "unsupported");
1216     __ sve_cnt(as_FloatRegister($dst$$reg), __ D,
1217          ptrue, as_FloatRegister($src$$reg));
1218     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
1219          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
1220   %}
1221   ins_pipe(pipe_slow);
1222 %}
1223 
1224 dnl
1225 dnl VPOPCOUNT_PREDICATE($1,          $2  )
1226 dnl VPOPCOUNT_PREDICATE(name_suffix, size)
1227 define(`VPOPCOUNT_PREDICATE', `
1228 instruct vpopcount$1_masked(vReg dst_src, pRegGov pg) %{
1229   predicate(UseSVE > 0`'ifelse($1, `L', ` &&
1230             n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
1231   match(Set dst_src (PopCountV$1 dst_src pg));
1232   ins_cost(SVE_COST);
1233   format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) ($2)" %}
1234   ins_encode %{
1235     assert(UsePopCountInstruction, "unsupported");dnl
1236 ifelse($1, `I', `
1237     BasicType bt = Matcher::vector_element_basic_type(this);', `')
1238     __ sve_cnt(as_FloatRegister($dst_src$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
1239          as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
1240   %}
1241   ins_pipe(pipe_slow);
1242 %}')dnl
1243 // vector popcount - predicated
1244 VPOPCOUNT_PREDICATE(I, B/H/S)
1245 VPOPCOUNT_PREDICATE(L, D)
1246 
1247 // vector blend
1248 
1249 instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
1250   predicate(UseSVE > 0);
1251   match(Set dst (VectorBlend (Binary src1 src2) pg));
1252   ins_cost(SVE_COST);
1253   format %{ "sve_sel $dst, $pg, $src2, $src1\t# vector blend (sve)" %}
1254   ins_encode %{
1255     Assembler::SIMD_RegVariant size =
1256                __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
1257     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
1258                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
1259   %}
1260   ins_pipe(pipe_slow);
1261 %}
1262 
1263 // vector store mask
1264 
1265 instruct vstoremaskB(vReg dst, pRegGov src, immI_1 size) %{
1266   predicate(UseSVE > 0);
1267   match(Set dst (VectorStoreMask src size));
1268   ins_cost(SVE_COST);
1269   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (B)" %}
1270   ins_encode %{
1271     __ sve_cpy(as_FloatRegister($dst$$reg), __ B, as_PRegister($src$$reg), 1, false);
1272   %}
1273   ins_pipe(pipe_slow);
1274 %}
1275 
1276 instruct vstoremask_narrow(vReg dst, pRegGov src, vReg tmp, immI_gt_1 size) %{
1277   predicate(UseSVE > 0);
1278   match(Set dst (VectorStoreMask src size));
1279   effect(TEMP_DEF dst, TEMP tmp);
1280   ins_cost(3 * SVE_COST);
1281   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (H/S/D)" %}
1282   ins_encode %{
1283     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant((int)$size$$constant);
1284     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($src$$reg), 1, false);
1285     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
1286                          as_FloatRegister($dst$$reg), size, as_FloatRegister($tmp$$reg));
1287   %}
1288   ins_pipe(pipe_slow);
1289 %}
1290 
1291 // Combine LoadVector+VectorLoadMask when the vector element type is not T_BYTE
1292 
1293 instruct vloadmask_loadV(pRegGov dst, indirect mem, vReg tmp, rFlagsReg cr) %{
1294   predicate(UseSVE > 0 &&
1295             n->as_Vector()->length_in_bytes() == MaxVectorSize &&
1296             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
1297   match(Set dst (VectorLoadMask (LoadVector mem)));
1298   effect(TEMP tmp, KILL cr);
1299   ins_cost(3 * SVE_COST);
1300   format %{ "sve_ld1b $tmp, $mem\n\t"
1301             "sve_cmpne $dst, $tmp, 0\t# load vector mask (sve) (H/S/D)" %}
1302   ins_encode %{
1303     // Load mask values which are boolean type, and extend them to the
1304     // expected vector element type. Convert the vector to predicate.
1305     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1306     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($tmp$$reg),
1307                           ptrue, T_BOOLEAN, to_vect_bt, $mem->opcode(),
1308                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1309     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ elemType_to_regVariant(to_vect_bt),
1310                ptrue, as_FloatRegister($tmp$$reg), 0);
1311   %}
1312   ins_pipe(pipe_slow);
1313 %}
1314 
1315 instruct vloadmask_loadV_partial(pRegGov dst, indirect mem, vReg vtmp, pRegGov ptmp, rFlagsReg cr) %{
1316   predicate(UseSVE > 0 &&
1317             n->as_Vector()->length_in_bytes() > 16 &&
1318             n->as_Vector()->length_in_bytes() < MaxVectorSize &&
1319             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
1320   match(Set dst (VectorLoadMask (LoadVector mem)));
1321   effect(TEMP vtmp, TEMP ptmp, KILL cr);
1322   ins_cost(6 * SVE_COST);
1323   format %{ "vloadmask_loadV $dst, $mem\t# load vector mask partial (sve) (H/S/D)" %}
1324   ins_encode %{
1325     // Load valid mask values which are boolean type, and extend them to the
1326     // expected vector element type. Convert the vector to predicate.
1327     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1328     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(to_vect_bt);
1329     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this));
1330     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($vtmp$$reg),
1331                           as_PRegister($ptmp$$reg), T_BOOLEAN, to_vect_bt, $mem->opcode(),
1332                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1333     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($vtmp$$reg), 0);
1334   %}
1335   ins_pipe(pipe_slow);
1336 %}
1337 
1338 // Combine VectorStoreMask+StoreVector when the vector element type is not T_BYTE
1339 
1340 instruct storeV_vstoremask(indirect mem, pRegGov src, vReg tmp, immI_gt_1 esize) %{
1341   predicate(UseSVE > 0 &&
1342             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) == MaxVectorSize);
1343   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1344   effect(TEMP tmp);
1345   ins_cost(3 * SVE_COST);
1346   format %{ "sve_cpy $tmp, $src, 1\n\t"
1347             "sve_st1b $tmp, $mem\t# store vector mask (sve) (H/S/D)" %}
1348   ins_encode %{
1349     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1350     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
1351     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
1352     __ sve_cpy(as_FloatRegister($tmp$$reg), size, as_PRegister($src$$reg), 1, false);
1353     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
1354                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
1355                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1356   %}
1357   ins_pipe(pipe_slow);
1358 %}
1359 
1360 instruct storeV_vstoremask_partial(indirect mem, pRegGov src, vReg vtmp,
1361                                    immI_gt_1 esize, pRegGov ptmp, rFlagsReg cr) %{
1362   predicate(UseSVE > 0 &&
1363             n->as_StoreVector()->memory_size() > 16 &&
1364             type2aelembytes(n->as_StoreVector()->vect_type()->element_basic_type()) > 1 &&
1365             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) < MaxVectorSize);
1366   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1367   effect(TEMP vtmp, TEMP ptmp, KILL cr);
1368   format %{ "storeV_vstoremask $src, $mem\t# store vector mask partial (sve) (H/S/D)" %}
1369   ins_cost(6 * SVE_COST);
1370   ins_encode %{
1371     // Convert the valid src predicate to vector, and store the vector
1372     // elements as boolean values.
1373     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1374     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(from_vect_bt);
1375     __ sve_cpy(as_FloatRegister($vtmp$$reg), size, as_PRegister($src$$reg), 1, false);
1376     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
1377     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($vtmp$$reg),
1378                           as_PRegister($ptmp$$reg), T_BOOLEAN, from_vect_bt, $mem->opcode(),
1379                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1380   %}
1381   ins_pipe(pipe_slow);
1382 %}
1383 dnl
1384 dnl REDUCE_I($1,        $2     )
1385 dnl REDUCE_I(insn_name, op_name)
1386 define(`REDUCE_I', `
1387 instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
1388   ifelse($2, AddReductionVI,
1389        `predicate(UseSVE > 0 &&
1390             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1391        `predicate(UseSVE > 0 &&
1392             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1393             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1394   match(Set dst ($2 src1 src2));
1395   effect(TEMP_DEF dst, TEMP tmp);
1396   ins_cost(SVE_COST);
1397   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve) (may extend)" %}
1398   ins_encode %{
1399     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1400     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1401                            $src1$$Register, as_FloatRegister($src2$$reg),
1402                            ptrue, as_FloatRegister($tmp$$reg));
1403   %}
1404   ins_pipe(pipe_slow);
1405 %}')dnl
1406 dnl
1407 dnl
1408 dnl REDUCE_L($1,        $2    )
1409 dnl REDUCE_L(insn_name, op_name)
1410 define(`REDUCE_L', `
1411 instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
1412   ifelse($2, AddReductionVL,
1413        `predicate(UseSVE > 0 &&
1414             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1415        `predicate(UseSVE > 0 &&
1416             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1417             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1418   match(Set dst ($2 src1 src2));
1419   effect(TEMP_DEF dst, TEMP tmp);
1420   ins_cost(SVE_COST);
1421   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %}
1422   ins_encode %{
1423     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1424                            $src1$$Register, as_FloatRegister($src2$$reg),
1425                            ptrue, as_FloatRegister($tmp$$reg));
1426   %}
1427   ins_pipe(pipe_slow);
1428 %}')dnl
1429 dnl
1430 dnl REDUCE_I_PARTIAL($1,        $2     )
1431 dnl REDUCE_I_PARTIAL(insn_name, op_name)
1432 define(`REDUCE_I_PARTIAL', `
1433 instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1434                              pRegGov ptmp, rFlagsReg cr) %{
1435   ifelse($2, AddReductionVI,
1436        `predicate(UseSVE > 0 &&
1437             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1438        `predicate(UseSVE > 0 &&
1439             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1440             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1441   match(Set dst ($2 src1 src2));
1442   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1443   ins_cost(2 * SVE_COST);
1444   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve) (may extend)" %}
1445   ins_encode %{
1446     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1447     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1448     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2));
1449     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1450                            $src1$$Register, as_FloatRegister($src2$$reg),
1451                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1452   %}
1453   ins_pipe(pipe_slow);
1454 %}')dnl
1455 dnl
1456 dnl REDUCE_L_PARTIAL($1,        $2    )
1457 dnl REDUCE_L_PARTIAL(insn_name, op_name)
1458 define(`REDUCE_L_PARTIAL', `
1459 instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1460                              pRegGov ptmp, rFlagsReg cr) %{
1461   ifelse($2, AddReductionVL,
1462        `predicate(UseSVE > 0 &&
1463             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1464        `predicate(UseSVE > 0 &&
1465             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1466             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1467   match(Set dst ($2 src1 src2));
1468   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1469   ins_cost(2 * SVE_COST);
1470   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction partial (sve)" %}
1471   ins_encode %{
1472     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2));
1473     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1474                            $src1$$Register, as_FloatRegister($src2$$reg),
1475                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1476   %}
1477   ins_pipe(pipe_slow);
1478 %}')dnl
1479 dnl
1480 dnl REDUCE_ADDF($1,        $2,      $3,      $4  )
1481 dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
1482 define(`REDUCE_ADDF', `
1483 instruct reduce_$1($3 src1_dst, vReg src2) %{
1484   predicate(UseSVE > 0 &&
1485             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1486   match(Set src1_dst ($2 src1_dst src2));
1487   ins_cost(SVE_COST);
1488   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
1489   ins_encode %{
1490     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1491          ptrue, as_FloatRegister($src2$$reg));
1492   %}
1493   ins_pipe(pipe_slow);
1494 %}')dnl
1495 dnl
1496 dnl
1497 dnl REDUCE_ADDF_PARTIAL($1,        $2,     $3,      $4  )
1498 dnl REDUCE_ADDF_PARTIAL(insn_name, suffix, reg_dst, size)
1499 define(`REDUCE_ADDF_PARTIAL', `
1500 instruct reduce_$1_partial($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
1501   predicate(UseSVE > 0 &&
1502             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1503   match(Set src1_dst ($2 src1_dst src2));
1504   ins_cost(SVE_COST);
1505   effect(TEMP ptmp, KILL cr);
1506   format %{ "sve_reduce_$1 $src1_dst, $src1_dst, $src2\t# $1 reduction partial (sve) ($4)" %}
1507   ins_encode %{
1508     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ $4, Matcher::vector_length(this, $src2));
1509     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1510                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1511   %}
1512   ins_pipe(pipe_slow);
1513 %}')dnl
1514 dnl
1515 dnl
1516 dnl REDUCE_I_PREDICATE($1,        $2     )
1517 dnl REDUCE_I_PREDICATE(insn_name, op_name)
1518 define(`REDUCE_I_PREDICATE', `
1519 instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
1520   ifelse($2, AddReductionVI,
1521        `predicate(UseSVE > 0);',
1522        `predicate(UseSVE > 0 &&
1523             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG);')
1524   match(Set dst ($2 (Binary src1 src2) pg));
1525   effect(TEMP_DEF dst, TEMP tmp);
1526   ins_cost(SVE_COST);
1527   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve) (may extend)" %}
1528   ins_encode %{
1529     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1530     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1531                            $src1$$Register, as_FloatRegister($src2$$reg),
1532                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1533   %}
1534   ins_pipe(pipe_slow);
1535 %}')dnl
1536 dnl
1537 dnl REDUCE_L_PREDICATE($1,        $2    )
1538 dnl REDUCE_L_PREDICATE(insn_name, op_name)
1539 define(`REDUCE_L_PREDICATE', `
1540 instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
1541   ifelse($2, AddReductionVL,
1542        `predicate(UseSVE > 0);',
1543        `predicate(UseSVE > 0 &&
1544             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);')
1545   match(Set dst ($2 (Binary src1 src2) pg));
1546   effect(TEMP_DEF dst, TEMP tmp);
1547   ins_cost(SVE_COST);
1548   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %}
1549   ins_encode %{
1550     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1551                            $src1$$Register, as_FloatRegister($src2$$reg),
1552                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1553   %}
1554   ins_pipe(pipe_slow);
1555 %}')dnl
1556 dnl
1557 dnl REDUCE_ADDF_PREDICATE($1,        $2,      $3,      $4  )
1558 dnl REDUCE_ADDF_PREDICATE(insn_name, op_name, reg_dst, size)
1559 define(`REDUCE_ADDF_PREDICATE', `
1560 instruct reduce_$1_masked($3 src1_dst, vReg src2, pRegGov pg) %{
1561   predicate(UseSVE > 0);
1562   match(Set src1_dst ($2 (Binary src1_dst src2) pg));
1563   ins_cost(SVE_COST);
1564   format %{ "sve_reduce_$1 $src1_dst, $pg, $src2\t# $1 reduction predicated (sve)" %}
1565   ins_encode %{
1566     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1567                  as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
1568   %}
1569   ins_pipe(pipe_slow);
1570 %}')dnl
1571 dnl
1572 
1573 // vector add reduction
1574 REDUCE_I(add, AddReductionVI)
1575 REDUCE_L(add, AddReductionVL)
1576 REDUCE_ADDF(addF, AddReductionVF, vRegF, S)
1577 REDUCE_ADDF(addD, AddReductionVD, vRegD, D)
1578 REDUCE_I_PARTIAL(add, AddReductionVI)
1579 REDUCE_L_PARTIAL(add, AddReductionVL)
1580 REDUCE_ADDF_PARTIAL(addF, AddReductionVF, vRegF, S)
1581 REDUCE_ADDF_PARTIAL(addD, AddReductionVD, vRegD, D)
1582 
1583 // vector add reduction - predicated
1584 REDUCE_I_PREDICATE(add, AddReductionVI)
1585 REDUCE_L_PREDICATE(add, AddReductionVL)
1586 REDUCE_ADDF_PREDICATE(addF, AddReductionVF, vRegF, S)
1587 REDUCE_ADDF_PREDICATE(addD, AddReductionVD, vRegD, D)
1588 
1589 // vector and reduction
1590 REDUCE_I(and, AndReductionV)
1591 REDUCE_L(and, AndReductionV)
1592 REDUCE_I_PARTIAL(and, AndReductionV)
1593 REDUCE_L_PARTIAL(and, AndReductionV)
1594 
1595 // vector and reduction - predicated
1596 REDUCE_I_PREDICATE(and, AndReductionV)
1597 REDUCE_L_PREDICATE(and, AndReductionV)
1598 
1599 // vector or reduction
1600 REDUCE_I(or, OrReductionV)
1601 REDUCE_L(or, OrReductionV)
1602 REDUCE_I_PARTIAL(or, OrReductionV)
1603 REDUCE_L_PARTIAL(or, OrReductionV)
1604 
1605 // vector or reduction - predicated
1606 REDUCE_I_PREDICATE(or, OrReductionV)
1607 REDUCE_L_PREDICATE(or, OrReductionV)
1608 
1609 // vector xor reduction
1610 REDUCE_I(eor, XorReductionV)
1611 REDUCE_L(eor, XorReductionV)
1612 REDUCE_I_PARTIAL(eor, XorReductionV)
1613 REDUCE_L_PARTIAL(eor, XorReductionV)
1614 
1615 // vector xor reduction - predicated
1616 REDUCE_I_PREDICATE(eor, XorReductionV)
1617 REDUCE_L_PREDICATE(eor, XorReductionV)
1618 
1619 dnl
1620 dnl REDUCE_MAXMIN_I($1,        $2     )
1621 dnl REDUCE_MAXMIN_I(insn_name, op_name)
1622 define(`REDUCE_MAXMIN_I', `
1623 instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
1624   predicate(UseSVE > 0 &&
1625             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1626             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1627             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
1628   match(Set dst ($2 src1 src2));
1629   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1630   ins_cost(SVE_COST);
1631   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve)" %}
1632   ins_encode %{
1633     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1634     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1635                            $src1$$Register, as_FloatRegister($src2$$reg),
1636                            ptrue, as_FloatRegister($tmp$$reg));
1637   %}
1638   ins_pipe(pipe_slow);
1639 %}')dnl
1640 dnl
1641 dnl REDUCE_MAXMIN_L($1,        $2     )
1642 dnl REDUCE_MAXMIN_L(insn_name, op_name)
1643 define(`REDUCE_MAXMIN_L', `
1644 instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
1645   predicate(UseSVE > 0 &&
1646             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1647             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1648   match(Set dst ($2 src1 src2));
1649   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1650   ins_cost(SVE_COST);
1651   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %}
1652   ins_encode %{
1653     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1654                            $src1$$Register, as_FloatRegister($src2$$reg),
1655                            ptrue, as_FloatRegister($tmp$$reg));
1656   %}
1657   ins_pipe(pipe_slow);
1658 %}')dnl
1659 dnl
1660 dnl REDUCE_MAXMIN_I_PARTIAL($1     , $2     )
1661 dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_name)
1662 define(`REDUCE_MAXMIN_I_PARTIAL', `
1663 instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1664                              pRegGov ptmp, rFlagsReg cr) %{
1665   predicate(UseSVE > 0 &&
1666             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1667             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1668             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
1669   match(Set dst ($2 src1 src2));
1670   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1671   ins_cost(2 * SVE_COST);
1672   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve)" %}
1673   ins_encode %{
1674     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1675     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1676     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), variant, Matcher::vector_length(this, $src2));
1677     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1678                            $src1$$Register, as_FloatRegister($src2$$reg),
1679                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1680   %}
1681   ins_pipe(pipe_slow);
1682 %}')dnl
1683 dnl
1684 dnl REDUCE_MAXMIN_L_PARTIAL($1     , $2     )
1685 dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name)
1686 define(`REDUCE_MAXMIN_L_PARTIAL', `
1687 instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1688                              pRegGov ptmp, rFlagsReg cr) %{
1689   predicate(UseSVE > 0 &&
1690             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1691             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1692   match(Set dst ($2 src1 src2));
1693   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1694   ins_cost(2 * SVE_COST);
1695   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction  partial (sve)" %}
1696   ins_encode %{
1697     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src2));
1698     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1699                            $src1$$Register, as_FloatRegister($src2$$reg),
1700                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1701   %}
1702   ins_pipe(pipe_slow);
1703 %}')dnl
1704 dnl
1705 dnl REDUCE_MAXMIN_I_PREDICATE($1     , $2     )
1706 dnl REDUCE_MAXMIN_I_PREDICATE(min_max, op_name)
1707 define(`REDUCE_MAXMIN_I_PREDICATE', `
1708 instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp,
1709                            pRegGov pg, rFlagsReg cr) %{
1710   predicate(UseSVE > 0 &&
1711             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1712             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
1713   match(Set dst ($2 (Binary src1 src2) pg));
1714   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1715   ins_cost(SVE_COST);
1716   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve)" %}
1717   ins_encode %{
1718     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1719     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1720                            $src1$$Register, as_FloatRegister($src2$$reg),
1721                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1722   %}
1723   ins_pipe(pipe_slow);
1724 %}')dnl
1725 dnl
1726 dnl REDUCE_MAXMIN_L_PREDICATE($1     , $2     )
1727 dnl REDUCE_MAXMIN_L_PREDICATE(min_max, op_name)
1728 define(`REDUCE_MAXMIN_L_PREDICATE', `
1729 instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp,
1730                           pRegGov pg, rFlagsReg cr) %{
1731   predicate(UseSVE > 0 &&
1732             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1733   match(Set dst ($2 (Binary src1 src2) pg));
1734   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1735   ins_cost(SVE_COST);
1736   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %}
1737   ins_encode %{
1738     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1739                            $src1$$Register, as_FloatRegister($src2$$reg),
1740                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1741   %}
1742   ins_pipe(pipe_slow);
1743 %}')dnl
1744 dnl
1745 dnl REDUCE_FMINMAX($1,      $2,          $3,           $4,   $5         )
1746 dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
1747 define(`REDUCE_FMINMAX', `
1748 instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
1749   predicate(UseSVE > 0 &&
1750             n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1751             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1752   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1753   ins_cost(INSN_COST);
1754   effect(TEMP_DEF dst);
1755   format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction (sve)" %}
1756   ins_encode %{
1757     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src2$$reg));
1758     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1759   %}
1760   ins_pipe(pipe_slow);
1761 %}')dnl
1762 dnl
1763 dnl REDUCE_FMINMAX_PARTIAL($1,      $2,          $3,           $4,   $5         )
1764 dnl REDUCE_FMINMAX_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst)
1765 define(`REDUCE_FMINMAX_PARTIAL', `
1766 instruct reduce_$1$2_partial($5 dst, $5 src1, vReg src2,
1767                              pRegGov ptmp, rFlagsReg cr) %{
1768   predicate(UseSVE > 0 &&
1769             n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1770             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1771   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1772   ins_cost(INSN_COST);
1773   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1774   format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction partial (sve)" %}
1775   ins_encode %{
1776     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ $4, Matcher::vector_length(this, $src2));
1777     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1778     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1779   %}
1780   ins_pipe(pipe_slow);
1781 %}')dnl
1782 dnl
1783 dnl REDUCE_FMINMAX_PREDICATE($1,      $2,          $3,           $4,   $5         )
1784 dnl REDUCE_FMINMAX_PREDICATE(min_max, name_suffix, element_type, size, reg_src_dst)
1785 define(`REDUCE_FMINMAX_PREDICATE', `
1786 instruct reduce_$1$2_masked($5 dst, $5 src1, vReg src2, pRegGov pg) %{
1787   predicate(UseSVE > 0 &&
1788             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == $3);
1789   match(Set dst (translit($1, `m', `M')ReductionV (Binary src1 src2) pg));
1790   ins_cost(SVE_COST);
1791   effect(TEMP_DEF dst);
1792   format %{ "sve_reduce_$1$2 $dst, $src1, $pg, $src2\t# $1$2 reduction predicated (sve)" %}
1793   ins_encode %{
1794     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
1795     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1796   %}
1797   ins_pipe(pipe_slow);
1798 %}')dnl
1799 dnl
1800 // vector max reduction
1801 REDUCE_MAXMIN_I(max, MaxReductionV)
1802 REDUCE_MAXMIN_L(max, MaxReductionV)
1803 REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV)
1804 REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV)
1805 REDUCE_FMINMAX(max, F, T_FLOAT,  S, vRegF)
1806 REDUCE_FMINMAX_PARTIAL(max, F, T_FLOAT,  S, vRegF)
1807 REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
1808 REDUCE_FMINMAX_PARTIAL(max, D, T_DOUBLE, D, vRegD)
1809 
1810 // vector max reduction - predicated
1811 REDUCE_MAXMIN_I_PREDICATE(max, MaxReductionV)
1812 REDUCE_MAXMIN_L_PREDICATE(max, MaxReductionV)
1813 REDUCE_FMINMAX_PREDICATE(max, F, T_FLOAT,  S, vRegF)
1814 REDUCE_FMINMAX_PREDICATE(max, D, T_DOUBLE, D, vRegD)
1815 
1816 // vector min reduction
1817 REDUCE_MAXMIN_I(min, MinReductionV)
1818 REDUCE_MAXMIN_L(min, MinReductionV)
1819 REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV)
1820 REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV)
1821 REDUCE_FMINMAX(min, F, T_FLOAT,  S, vRegF)
1822 REDUCE_FMINMAX_PARTIAL(min, F, T_FLOAT,  S, vRegF)
1823 REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
1824 REDUCE_FMINMAX_PARTIAL(min, D, T_DOUBLE, D, vRegD)
1825 
1826 // vector min reduction - predicated
1827 REDUCE_MAXMIN_I_PREDICATE(min, MinReductionV)
1828 REDUCE_MAXMIN_L_PREDICATE(min, MinReductionV)
1829 REDUCE_FMINMAX_PREDICATE(min, F, T_FLOAT,  S, vRegF)
1830 REDUCE_FMINMAX_PREDICATE(min, D, T_DOUBLE, D, vRegD)
1831 
1832 // vector Math.rint, floor, ceil
1833 
1834 instruct vroundD(vReg dst, vReg src, immI rmode) %{
1835   predicate(UseSVE > 0 &&
1836             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1837   match(Set dst (RoundDoubleModeV src rmode));
1838   format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
1839   ins_encode %{
1840     switch ($rmode$$constant) {
1841       case RoundDoubleModeNode::rmode_rint:
1842         __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
1843              ptrue, as_FloatRegister($src$$reg));
1844         break;
1845       case RoundDoubleModeNode::rmode_floor:
1846         __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
1847              ptrue, as_FloatRegister($src$$reg));
1848         break;
1849       case RoundDoubleModeNode::rmode_ceil:
1850         __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
1851              ptrue, as_FloatRegister($src$$reg));
1852         break;
1853     }
1854   %}
1855   ins_pipe(pipe_slow);
1856 %}
1857 define(`VECTOR_JAVA_FROUND', `
1858 instruct vround$1to$3($7 dst, $7 src, $7 tmp1, $7 tmp2, $7 tmp3, pRegGov ptmp)
1859 %{
1860   predicate(UseSVE > 0);
1861   match(Set dst (RoundV$1 src));
1862   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp);
1863   format %{ "sve_vround  $dst, $4, $src\t# round $1 to $3 vector" %}
1864   ins_encode %{
1865     BasicType bt = Matcher::vector_element_basic_type(this);
1866     int vlen = Matcher::vector_length_in_bytes(this);
1867     if (vlen > 16) {
1868       __ vector_round_sve(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1869                           as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
1870                           as_PRegister($ptmp$$reg), __ $4);
1871     } else {
1872       __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1873                            as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
1874                            as_FloatRegister($tmp3$$reg),
1875                            __ esize2arrangement(type2aelembytes(bt),
1876                               /*isQ*/ vlen == 16));
1877     }
1878   %}
1879   ins_pipe(pipe_class_default);
1880 %}')dnl           $1  $2  $3 $4 $5    $6    $7
1881 VECTOR_JAVA_FROUND(F, 8F,  I, S, 8,  INT, vReg)
1882 VECTOR_JAVA_FROUND(D, 4D,  L, D, 4, LONG, vReg)
1883 dnl
1884 dnl REPLICATE($1,        $2,      $3,      $4,   $5         )
1885 dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
1886 define(`REPLICATE', `
1887 instruct $1(vReg dst, $3 src) %{
1888   predicate(UseSVE > 0);
1889   match(Set dst ($2 src));
1890   ins_cost(SVE_COST);
1891   format %{ "sve_dup  $dst, $src\t# vector (sve) ($4)" %}
1892   ins_encode %{
1893     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
1894   %}
1895   ins_pipe(pipe_slow);
1896 %}')dnl
1897 dnl
1898 dnl REPLICATE_IMM8($1,        $2,      $3,       $4,   $5         )
1899 dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
1900 define(`REPLICATE_IMM8', `
1901 instruct $1(vReg dst, $3 con) %{
1902   predicate(UseSVE > 0);
1903   match(Set dst ($2 con));
1904   ins_cost(SVE_COST);
1905   format %{ "sve_dup  $dst, $con\t# vector (sve) ($4)" %}
1906   ins_encode %{
1907     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
1908   %}
1909   ins_pipe(pipe_slow);
1910 %}')dnl
1911 dnl
1912 dnl FREPLICATE($1,        $2,      $3,        $4)
1913 dnl FREPLICATE(insn_name, op_name, reg_src, size)
1914 define(`FREPLICATE', `
1915 instruct $1(vReg dst, $3 src) %{
1916   predicate(UseSVE > 0);
1917   match(Set dst ($2 src));
1918   ins_cost(SVE_COST);
1919   format %{ "sve_cpy  $dst, $src\t# vector (sve) ($4)" %}
1920   ins_encode %{
1921     __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
1922          ptrue, as_FloatRegister($src$$reg));
1923   %}
1924   ins_pipe(pipe_slow);
1925 %}')dnl
1926 
1927 // vector replicate
1928 REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
1929 REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
1930 REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
1931 REPLICATE(replicateL, ReplicateL, iRegL,      D, 2)
1932 REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8,        B, 16)
1933 REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
1934 REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
1935 REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
1936 FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
1937 FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
1938 dnl
1939 dnl VSHIFT_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
1940 dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
1941 define(`VSHIFT_TRUE_PREDICATE', `
1942 instruct $1(vReg dst, vReg shift) %{
1943   predicate(UseSVE > 0);
1944   match(Set dst ($2 dst shift));
1945   ins_cost(SVE_COST);
1946   format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
1947   ins_encode %{
1948     __ $5(as_FloatRegister($dst$$reg), __ $3,
1949          ptrue, as_FloatRegister($shift$$reg));
1950   %}
1951   ins_pipe(pipe_slow);
1952 %}')dnl
1953 dnl
1954 dnl VSHIFT_IMM_UNPREDICATED($1,        $2,      $3,       $4,   $5,          $6  )
1955 dnl VSHIFT_IMM_UNPREDICATED(insn_name, op_name, op_name2, size, min_vec_len, insn)
1956 define(`VSHIFT_IMM_UNPREDICATED', `
1957 instruct $1(vReg dst, vReg src, immI shift) %{
1958   predicate(UseSVE > 0);
1959   match(Set dst ($2 src ($3 shift)));
1960   ins_cost(SVE_COST);
1961   format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
1962   ins_encode %{
1963     int con = (int)$shift$$constant;dnl
1964 ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
1965     if (con == 0) {
1966       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1967            as_FloatRegister($src$$reg));
1968       return;
1969     }')dnl
1970 ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1971     if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
1972     if (con >= 16) con = 15;')')dnl
1973 ifelse(eval(index(`$1', `vlsl') == 0  || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1974     if (con >= 8) {
1975       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1976            as_FloatRegister($src$$reg));
1977       return;
1978     }')ifelse(eval(index(`$4', `H') == 0), 1, `
1979     if (con >= 16) {
1980       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1981            as_FloatRegister($src$$reg));
1982       return;
1983     }')')
1984     __ $6(as_FloatRegister($dst$$reg), __ $4,
1985          as_FloatRegister($src$$reg), con);
1986   %}
1987   ins_pipe(pipe_slow);
1988 %}')dnl
1989 dnl
1990 dnl VSHIFT_COUNT($1,        $2,   $3,          $4  )
1991 dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
1992 define(`VSHIFT_COUNT', `
1993 instruct $1(vReg dst, iRegIorL2I cnt) %{
1994   predicate(UseSVE > 0 &&
1995             ELEMENT_SHORT_CHAR($4, n));
1996   match(Set dst (LShiftCntV cnt));
1997   match(Set dst (RShiftCntV cnt));
1998   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
1999   ins_encode %{
2000     __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
2001   %}
2002   ins_pipe(pipe_slow);
2003 %}')dnl
2004 
2005 // vector shift
2006 VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB,  B, 16, sve_asr)
2007 VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS,  H,  8, sve_asr)
2008 VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI,  S,  4, sve_asr)
2009 VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL,  D,  2, sve_asr)
2010 VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB,  B, 16, sve_lsl)
2011 VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS,  H,  8, sve_lsl)
2012 VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI,  S,  4, sve_lsl)
2013 VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL,  D,  2, sve_lsl)
2014 VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
2015 VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H,  8, sve_lsr)
2016 VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S,  4, sve_lsr)
2017 VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D,  2, sve_lsr)
2018 VSHIFT_IMM_UNPREDICATED(vasrB_imm, RShiftVB,  RShiftCntV, B, 16, sve_asr)
2019 VSHIFT_IMM_UNPREDICATED(vasrS_imm, RShiftVS,  RShiftCntV, H,  8, sve_asr)
2020 VSHIFT_IMM_UNPREDICATED(vasrI_imm, RShiftVI,  RShiftCntV, S,  4, sve_asr)
2021 VSHIFT_IMM_UNPREDICATED(vasrL_imm, RShiftVL,  RShiftCntV, D,  2, sve_asr)
2022 VSHIFT_IMM_UNPREDICATED(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
2023 VSHIFT_IMM_UNPREDICATED(vlsrS_imm, URShiftVS, RShiftCntV, H,  8, sve_lsr)
2024 VSHIFT_IMM_UNPREDICATED(vlsrI_imm, URShiftVI, RShiftCntV, S,  4, sve_lsr)
2025 VSHIFT_IMM_UNPREDICATED(vlsrL_imm, URShiftVL, RShiftCntV, D,  2, sve_lsr)
2026 VSHIFT_IMM_UNPREDICATED(vlslB_imm, LShiftVB,  LShiftCntV, B, 16, sve_lsl)
2027 VSHIFT_IMM_UNPREDICATED(vlslS_imm, LShiftVS,  LShiftCntV, H,  8, sve_lsl)
2028 VSHIFT_IMM_UNPREDICATED(vlslI_imm, LShiftVI,  LShiftCntV, S,  4, sve_lsl)
2029 VSHIFT_IMM_UNPREDICATED(vlslL_imm, LShiftVL,  LShiftCntV, D,  2, sve_lsl)
2030 VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
2031 VSHIFT_COUNT(vshiftcntS, H,  8, T_SHORT)
2032 VSHIFT_COUNT(vshiftcntI, S,  4, T_INT)
2033 VSHIFT_COUNT(vshiftcntL, D,  2, T_LONG)
2034 
2035 // vector shift - predicated
2036 BINARY_OP_PREDICATE(vasrB, RShiftVB,  B, sve_asr)
2037 BINARY_OP_PREDICATE(vasrS, RShiftVS,  H, sve_asr)
2038 BINARY_OP_PREDICATE(vasrI, RShiftVI,  S, sve_asr)
2039 BINARY_OP_PREDICATE(vasrL, RShiftVL,  D, sve_asr)
2040 BINARY_OP_PREDICATE(vlslB, LShiftVB,  B, sve_lsl)
2041 BINARY_OP_PREDICATE(vlslS, LShiftVS,  H, sve_lsl)
2042 BINARY_OP_PREDICATE(vlslI, LShiftVI,  S, sve_lsl)
2043 BINARY_OP_PREDICATE(vlslL, LShiftVL,  D, sve_lsl)
2044 BINARY_OP_PREDICATE(vlsrB, URShiftVB, B, sve_lsr)
2045 BINARY_OP_PREDICATE(vlsrS, URShiftVS, H, sve_lsr)
2046 BINARY_OP_PREDICATE(vlsrI, URShiftVI, S, sve_lsr)
2047 BINARY_OP_PREDICATE(vlsrL, URShiftVL, D, sve_lsr)
2048 dnl
2049 dnl VSHIFT_IMM_PREDICATED($1,        $2,      $3,       $4,   $5,   $6  )
2050 dnl VSHIFT_IMM_PREDICATED(insn_name, op_name, op_name2, type, size, insn)
2051 define(`VSHIFT_IMM_PREDICATED', `
2052 instruct $1_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
2053   predicate(UseSVE > 0);
2054   match(Set dst_src ($2 (Binary dst_src ($3 shift)) pg));
2055   ins_cost(SVE_COST);
2056   format %{ "$6 $dst_src, $pg, $dst_src, $shift\t# vector (sve) ($4)" %}
2057   ins_encode %{
2058     int con = (int)$shift$$constant;
2059     assert(con ifelse(index(`$1', `vlsl'), 0, `>=', `>') 0 && con < $5, "invalid shift immediate");
2060     __ $6(as_FloatRegister($dst_src$$reg), __ $4, as_PRegister($pg$$reg), con);
2061   %}
2062   ins_pipe(pipe_slow);
2063 %}')dnl
2064 dnl
2065 VSHIFT_IMM_PREDICATED(vasrB, RShiftVB,  RShiftCntV, B, 8,  sve_asr)
2066 VSHIFT_IMM_PREDICATED(vasrS, RShiftVS,  RShiftCntV, H, 16, sve_asr)
2067 VSHIFT_IMM_PREDICATED(vasrI, RShiftVI,  RShiftCntV, S, 32, sve_asr)
2068 VSHIFT_IMM_PREDICATED(vasrL, RShiftVL,  RShiftCntV, D, 64, sve_asr)
2069 VSHIFT_IMM_PREDICATED(vlsrB, URShiftVB, RShiftCntV, B, 8,  sve_lsr)
2070 VSHIFT_IMM_PREDICATED(vlsrS, URShiftVS, RShiftCntV, H, 16, sve_lsr)
2071 VSHIFT_IMM_PREDICATED(vlsrI, URShiftVI, RShiftCntV, S, 32, sve_lsr)
2072 VSHIFT_IMM_PREDICATED(vlsrL, URShiftVL, RShiftCntV, D, 64, sve_lsr)
2073 VSHIFT_IMM_PREDICATED(vlslB, LShiftVB,  LShiftCntV, B, 8,  sve_lsl)
2074 VSHIFT_IMM_PREDICATED(vlslS, LShiftVS,  LShiftCntV, H, 16, sve_lsl)
2075 VSHIFT_IMM_PREDICATED(vlslI, LShiftVI,  LShiftCntV, S, 32, sve_lsl)
2076 VSHIFT_IMM_PREDICATED(vlslL, LShiftVL,  LShiftCntV, D, 64, sve_lsl)
2077 
2078 // vector sqrt
2079 UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt)
2080 UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt)
2081 
2082 // vector sqrt - predicated
2083 UNARY_OP_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt)
2084 UNARY_OP_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt)
2085 
2086 // vector sub
2087 BINARY_OP_UNPREDICATE(vsubB, SubVB, B, 16, sve_sub)
2088 BINARY_OP_UNPREDICATE(vsubS, SubVS, H, 8, sve_sub)
2089 BINARY_OP_UNPREDICATE(vsubI, SubVI, S, 4, sve_sub)
2090 BINARY_OP_UNPREDICATE(vsubL, SubVL, D, 2, sve_sub)
2091 BINARY_OP_UNPREDICATE(vsubF, SubVF, S, 4, sve_fsub)
2092 BINARY_OP_UNPREDICATE(vsubD, SubVD, D, 2, sve_fsub)
2093 
2094 // vector sub - predicated
2095 BINARY_OP_PREDICATE(vsubB, SubVB, B, sve_sub)
2096 BINARY_OP_PREDICATE(vsubS, SubVS, H, sve_sub)
2097 BINARY_OP_PREDICATE(vsubI, SubVI, S, sve_sub)
2098 BINARY_OP_PREDICATE(vsubL, SubVL, D, sve_sub)
2099 BINARY_OP_PREDICATE(vsubF, SubVF, S, sve_fsub)
2100 BINARY_OP_PREDICATE(vsubD, SubVD, D, sve_fsub)
2101 
2102 // ------------------------------ Vector mask cast --------------------------
2103 
2104 instruct vmaskcast(pRegGov dst_src) %{
2105   predicate(UseSVE > 0 &&
2106             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
2107             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
2108   match(Set dst_src (VectorMaskCast dst_src));
2109   ins_cost(0);
2110   format %{ "vmaskcast $dst_src\t# empty (sve)" %}
2111   ins_encode %{
2112     // empty
2113   %}
2114   ins_pipe(pipe_class_empty);
2115 %}
2116 
2117 instruct vmaskcast_extend(pRegGov dst, pReg src)
2118 %{
2119   predicate(UseSVE > 0 &&
2120             (Matcher::vector_length_in_bytes(n) == 2 * Matcher::vector_length_in_bytes(n->in(1)) ||
2121              Matcher::vector_length_in_bytes(n) == 4 * Matcher::vector_length_in_bytes(n->in(1)) ||
2122              Matcher::vector_length_in_bytes(n) == 8 * Matcher::vector_length_in_bytes(n->in(1))));
2123   match(Set dst (VectorMaskCast src));
2124   ins_cost(SVE_COST * 3);
2125   format %{ "sve_vmaskcast_extend  $dst, $src\t# extend predicate $src" %}
2126   ins_encode %{
2127     __ sve_vmaskcast_extend(as_PRegister($dst$$reg), as_PRegister($src$$reg),
2128                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
2129   %}
2130   ins_pipe(pipe_slow);
2131 %}
2132 
2133 instruct vmaskcast_narrow(pRegGov dst, pReg src)
2134 %{
2135   predicate(UseSVE > 0 &&
2136             (Matcher::vector_length_in_bytes(n) * 2 == Matcher::vector_length_in_bytes(n->in(1)) ||
2137              Matcher::vector_length_in_bytes(n) * 4 == Matcher::vector_length_in_bytes(n->in(1)) ||
2138              Matcher::vector_length_in_bytes(n) * 8 == Matcher::vector_length_in_bytes(n->in(1))));
2139   match(Set dst (VectorMaskCast src));
2140   ins_cost(SVE_COST * 3);
2141   format %{ "sve_vmaskcast_narrow  $dst, $src\t# narrow predicate $src" %}
2142   ins_encode %{
2143     __ sve_vmaskcast_narrow(as_PRegister($dst$$reg), as_PRegister($src$$reg),
2144                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
2145   %}
2146   ins_pipe(pipe_slow);
2147 %}
2148 dnl
2149 
2150 // ------------------------------ Vector cast -------------------------------
2151 dnl
2152 dnl
2153 define(`VECTOR_CAST_X2X', `
2154 instruct vcvt$1to$2`'(vReg dst, vReg src)
2155 %{
2156   predicate(UseSVE > 0 &&
2157             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
2158   match(Set dst (VectorCast$1`'2X src));
2159   ins_cost(SVE_COST);
2160   format %{ "sve_vectorcast_$5  $dst, $src\t# convert $1 to $2 vector" %}
2161   ins_encode %{
2162     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
2163   %}
2164   ins_pipe(pipe_slow);
2165 %}')dnl
2166 
2167 dnl
2168 dnl Start of vector cast rules
2169 dnl
2170 instruct vcvtBtoX_extend(vReg dst, vReg src)
2171 %{
2172   predicate(UseSVE > 0);
2173   match(Set dst (VectorCastB2X src));
2174   ins_cost(2 * SVE_COST);
2175   format %{ "sve_vectorcast_b2x  $dst, $src\t# convert B to X vector (extend)" %}
2176   ins_encode %{
2177     BasicType to_bt = Matcher::vector_element_basic_type(this);
2178     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2179     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ B);
2180     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
2181       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
2182     }
2183   %}
2184   ins_pipe(pipe_slow);
2185 %}
2186 
2187 instruct vcvtStoB(vReg dst, vReg src, vReg tmp)
2188 %{
2189   predicate(UseSVE > 0 &&
2190             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2191   match(Set dst (VectorCastS2X src));
2192   effect(TEMP tmp);
2193   ins_cost(2 * SVE_COST);
2194   format %{ "sve_vectorcast_s2b  $dst, $src\t# convert H to B vector" %}
2195   ins_encode %{
2196     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
2197                          as_FloatRegister($src$$reg), __ H, as_FloatRegister($tmp$$reg));
2198   %}
2199   ins_pipe(pipe_slow);
2200 %}
2201 
2202 instruct vcvtStoX_extend(vReg dst, vReg src)
2203 %{
2204   predicate(UseSVE > 0 &&
2205             type2aelembytes(Matcher::vector_element_basic_type(n)) > 2);
2206   match(Set dst (VectorCastS2X src));
2207   ins_cost(2 * SVE_COST);
2208   format %{ "sve_vectorcast_s2x  $dst, $src\t# convert H to X vector (extend)" %}
2209   ins_encode %{
2210     BasicType to_bt = Matcher::vector_element_basic_type(this);
2211     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2212     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ H);
2213     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
2214       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
2215     }
2216   %}
2217   ins_pipe(pipe_slow);
2218 %}
2219 
2220 instruct vcvtItoB(vReg dst, vReg src, vReg tmp)
2221 %{
2222   predicate(UseSVE > 0 &&
2223             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2224   match(Set dst (VectorCastI2X src));
2225   effect(TEMP_DEF dst, TEMP tmp);
2226   ins_cost(3 * SVE_COST);
2227   format %{ "sve_vectorcast_i2b  $dst, $src\t# convert I to B vector" %}
2228   ins_encode %{
2229     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
2230                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
2231   %}
2232   ins_pipe(pipe_slow);
2233 %}
2234 
2235 instruct vcvtItoS(vReg dst, vReg src, vReg tmp)
2236 %{
2237   predicate(UseSVE > 0 &&
2238             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2239   match(Set dst (VectorCastI2X src));
2240   effect(TEMP tmp);
2241   ins_cost(2 * SVE_COST);
2242   format %{ "sve_vectorcast_i2s $dst, $src\t# convert I to H vector" %}
2243   ins_encode %{
2244     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ H,
2245                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
2246   %}
2247   ins_pipe(pipe_slow);
2248 %}
2249 
2250 instruct vcvtItoL(vReg dst, vReg src)
2251 %{
2252   predicate(UseSVE > 0 &&
2253             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2254   match(Set dst (VectorCastI2X src));
2255   ins_cost(SVE_COST);
2256   format %{ "sve_vectorcast_i2l  $dst, $src\t# convert I to L vector" %}
2257   ins_encode %{
2258     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
2259   %}
2260   ins_pipe(pipe_slow);
2261 %}
2262 dnl
2263 dnl vcvtItoF
2264 VECTOR_CAST_X2X(I, F, scvtf, S, i2f)
2265 
2266 instruct vcvtItoD(vReg dst, vReg src)
2267 %{
2268   predicate(UseSVE > 0 &&
2269             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2270   match(Set dst (VectorCastI2X src));
2271   ins_cost(2 * SVE_COST);
2272   format %{ "sve_vectorcast_i2d  $dst, $src\t# convert I to D vector" %}
2273   ins_encode %{
2274     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
2275     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
2276   %}
2277   ins_pipe(pipe_slow);
2278 %}
2279 
2280 instruct vcvtLtoX_narrow(vReg dst, vReg src, vReg tmp)
2281 %{
2282   predicate(UseSVE > 0 && is_integral_type(Matcher::vector_element_basic_type(n)));
2283   match(Set dst (VectorCastL2X src));
2284   effect(TEMP_DEF dst, TEMP tmp);
2285   ins_cost(2 * SVE_COST);
2286   format %{ "sve_vectorcast_l2x  $dst, $src\t# convert L to B/H/S vector (narrow)" %}
2287   ins_encode %{
2288     BasicType to_bt = Matcher::vector_element_basic_type(this);
2289     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2290     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2291                          as_FloatRegister($src$$reg), __ D, as_FloatRegister($tmp$$reg));
2292   %}
2293   ins_pipe(pipe_slow);
2294 %}
2295 
2296 instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)
2297 %{
2298   predicate(UseSVE > 0 &&
2299             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2300   match(Set dst (VectorCastL2X src));
2301   effect(TEMP_DEF dst, TEMP tmp);
2302   ins_cost(3 * SVE_COST);
2303   format %{ "sve_vectorcast_l2f  $dst, $src\t# convert L to F vector" %}
2304   ins_encode %{
2305     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
2306     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
2307                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2308 
2309   %}
2310   ins_pipe(pipe_slow);
2311 %}
2312 dnl
2313 dnl vcvtLtoD
2314 VECTOR_CAST_X2X(L, D, scvtf, D, l2d)
2315 
2316 instruct vcvtFtoX_narrow(vReg dst, vReg src, vReg tmp)
2317 %{
2318   predicate(UseSVE > 0 &&
2319             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2320              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT));
2321   match(Set dst (VectorCastF2X src));
2322   effect(TEMP_DEF dst, TEMP tmp);
2323   ins_cost(3 * SVE_COST);
2324   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to B/H vector" %}
2325   ins_encode %{
2326     BasicType to_bt = Matcher::vector_element_basic_type(this);
2327     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2328     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2329     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2330                          as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg));
2331   %}
2332   ins_pipe(pipe_slow);
2333 %}
2334 
2335 instruct vcvtFtoI(vReg dst, vReg src)
2336 %{
2337   predicate(UseSVE > 0 &&
2338             (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2339   match(Set dst (VectorCastF2X src));
2340   ins_cost(SVE_COST);
2341   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to I vector" %}
2342   ins_encode %{
2343     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2344   %}
2345   ins_pipe(pipe_slow);
2346 %}
2347 
2348 instruct vcvtFtoL(vReg dst, vReg src)
2349 %{
2350   predicate(UseSVE > 0 &&
2351             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
2352   match(Set dst (VectorCastF2X src));
2353   ins_cost(SVE_COST * 2);
2354   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to L vector" %}
2355   ins_encode %{
2356     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
2357     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
2358   %}
2359   ins_pipe(pipe_slow);
2360 %}
2361 
2362 instruct vcvtFtoD(vReg dst, vReg src)
2363 %{
2364   predicate(UseSVE > 0 &&
2365             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2366   match(Set dst (VectorCastF2X src));
2367   ins_cost(2 * SVE_COST);
2368   format %{ "sve_vectorcast_f2d  $dst, $dst\t# convert F to D vector" %}
2369   ins_encode %{
2370     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
2371     __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
2372   %}
2373   ins_pipe(pipe_slow);
2374 %}
2375 
2376 instruct vcvtDtoX_narrow(vReg dst, vReg src, vReg tmp)
2377 %{
2378   predicate(UseSVE > 0 &&
2379             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2380              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2381              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2382   match(Set dst (VectorCastD2X src));
2383   effect(TEMP_DEF dst, TEMP tmp);
2384   ins_cost(3 * SVE_COST);
2385   format %{ "sve_vectorcast_d2x  $dst, $src\t# convert D to X vector (narrow)" %}
2386   ins_encode %{
2387     BasicType to_bt = Matcher::vector_element_basic_type(this);
2388     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2389     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
2390     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2391                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2392   %}
2393   ins_pipe(pipe_slow);
2394 %}
2395 dnl
2396 dnl vcvtDtoL
2397 VECTOR_CAST_X2X(D, L, fcvtzs, D, d2l)
2398 
2399 instruct vcvtDtoF(vReg dst, vReg src, vReg tmp)
2400 %{
2401   predicate(UseSVE > 0 &&
2402             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2403   match(Set dst (VectorCastD2X src));
2404   effect(TEMP_DEF dst, TEMP tmp);
2405   ins_cost(3 * SVE_COST);
2406   format %{ "sve_vectorcast_d2f  $dst, S, $dst\t# convert D to F vector" %}
2407   ins_encode %{
2408     __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
2409     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
2410                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2411   %}
2412   ins_pipe(pipe_slow);
2413 %}
2414 
2415 dnl
2416 dnl
2417 // ------------------------------ Vector extract ---------------------------------
2418 dnl
2419 define(`VECTOR_EXTRACT_I', `
2420 instruct extract$1`'($3 dst, vReg src, immI idx, vReg vtmp)
2421 %{
2422   predicate(UseSVE > 0 && n->in(2)->get_int() >= $2);
2423   match(Set dst (Extract$1 src idx));
2424   effect(TEMP vtmp);
2425   ins_cost(2 * SVE_COST);
2426   format %{ "sve_extract_integral $dst, $4, $src, $idx\t# extract from vector($1)" %}
2427   ins_encode %{
2428     __ sve_extract_integral(as_Register($dst$$reg), __ $4, as_FloatRegister($src$$reg),
2429                             (int)($idx$$constant), /* is_signed */ ifelse($1, L, false, true), as_FloatRegister($vtmp$$reg));
2430   %}
2431   ins_pipe(pipe_slow);
2432 %}')dnl
2433 dnl              $1 $2  $3         $4
2434 VECTOR_EXTRACT_I(B, 16, iRegINoSp, B)
2435 VECTOR_EXTRACT_I(S, 8,  iRegINoSp, H)
2436 VECTOR_EXTRACT_I(I, 4,  iRegINoSp, S)
2437 VECTOR_EXTRACT_I(L, 2,  iRegLNoSp, D)
2438 dnl
2439 define(`VECTOR_EXTRACT_I_LT', `
2440 instruct extract$1_LT$2`'($3 dst, vReg src, immI idx)
2441 %{
2442   predicate(UseSVE > 0 && n->in(2)->get_int() < $2);
2443   match(Set dst (Extract$1 src idx));
2444   ins_cost(INSN_COST);
2445   format %{ "ifelse($4, D, umov, smov) $dst, $4, $src, $idx\t# extract from vector($1)" %}
2446   ins_encode %{
2447     __ ifelse($4, D, umov, smov)(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ $4, $idx$$constant);
2448   %}
2449   ins_pipe(pipe_class_default);
2450 %}')dnl
2451 dnl                 $1  $2  $3         $4
2452 VECTOR_EXTRACT_I_LT(B,  16, iRegINoSp, B)
2453 VECTOR_EXTRACT_I_LT(S,  8,  iRegINoSp, H)
2454 VECTOR_EXTRACT_I_LT(I,  4,  iRegINoSp, S)
2455 VECTOR_EXTRACT_I_LT(L,  2,  iRegLNoSp, D)
2456 
2457 instruct extractF(vRegF dst, vReg src, immI idx)
2458 %{
2459   predicate(UseSVE > 0);
2460   match(Set dst (ExtractF src idx));
2461   ins_cost(2 * SVE_COST);
2462   format %{ "sve_extract_f $dst, S, $src, $idx\t# extract from vector(F)" %}
2463   ins_encode %{
2464     if ((as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg)) && ($idx$$constant == 0)) {
2465       /* empty */
2466     } else if ($idx$$constant == 0) {
2467       __ fmovs(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
2468     } else if ($idx$$constant < 4) {
2469       __ ins(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), 0, (int)($idx$$constant));
2470     } else {
2471       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2472       __ sve_ext(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), $idx$$constant << 2);
2473     }
2474   %}
2475   ins_pipe(pipe_slow);
2476 %}
2477 
2478 instruct extractD(vRegD dst, vReg src, immI idx)
2479 %{
2480   predicate(UseSVE > 0);
2481   match(Set dst (ExtractD src idx));
2482   ins_cost(2 * SVE_COST);
2483   format %{ "sve_extract_d $dst, D, $src, $idx\t# extract from vector(D)" %}
2484   ins_encode %{
2485     if ((as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg)) && ($idx$$constant == 0)) {
2486       /* empty */
2487     } else if ($idx$$constant == 0) {
2488       __ fmovd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
2489     } else if ($idx$$constant == 1) {
2490       __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1);
2491     } else {
2492       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2493       __ sve_ext(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), $idx$$constant << 3);
2494     }
2495   %}
2496   ins_pipe(pipe_slow);
2497 %}
2498 
2499 // ------------------------------- VectorTest ----------------------------------
2500 
2501 instruct vtest_alltrue(iRegINoSp dst, pRegGov src1, pRegGov src2, pReg ptmp, rFlagsReg cr)
2502 %{
2503   predicate(UseSVE > 0 &&
2504             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2505   match(Set dst (VectorTest src1 src2));
2506   effect(TEMP ptmp, KILL cr);
2507   ins_cost(SVE_COST);
2508   format %{ "sve_eors $ptmp, $src1, $src2\t# $src2 is all true mask\n"
2509             "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %}
2510   ins_encode %{
2511     __ sve_eors(as_PRegister($ptmp$$reg), ptrue,
2512                 as_PRegister($src1$$reg), as_PRegister($src2$$reg));
2513     __ csetw(as_Register($dst$$reg), Assembler::EQ);
2514   %}
2515   ins_pipe(pipe_slow);
2516 %}
2517 
2518 instruct vtest_anytrue(iRegINoSp dst, pRegGov src1, pRegGov src2, rFlagsReg cr)
2519 %{
2520   predicate(UseSVE > 0 &&
2521             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2522   match(Set dst (VectorTest src1 src2));
2523   effect(KILL cr);
2524   ins_cost(SVE_COST);
2525   format %{ "sve_ptest $src1\n\t"
2526             "csetw $dst, NE\t# VectorTest (sve) - anytrue" %}
2527   ins_encode %{
2528     // "src2" is not used for sve.
2529     __ sve_ptest(ptrue, as_PRegister($src1$$reg));
2530     __ csetw(as_Register($dst$$reg), Assembler::NE);
2531   %}
2532   ins_pipe(pipe_slow);
2533 %}
2534 dnl
2535 // ------------------------------ Vector insert ---------------------------------
2536 
2537 instruct insertI_le128bits(vReg dst, vReg src, iRegIorL2I val, immI idx) %{
2538   predicate(UseSVE > 0 &&
2539             (Matcher::vector_length_in_bytes(n) == 8 || Matcher::vector_length_in_bytes(n) == 16) &&
2540             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2541              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2542              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2543   match(Set dst (VectorInsert (Binary src val) idx));
2544   ins_cost(2 * INSN_COST);
2545   format %{ "orr    $dst, T8/16B, $src, $src\n\t"
2546             "mov    $dst, B/H/S, $idx, $val\t# insertI into vector(64/128bits)" %}
2547   ins_encode %{
2548     BasicType bt = Matcher::vector_element_basic_type(this);
2549     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2550       __ orr(as_FloatRegister($dst$$reg), Matcher::vector_length_in_bytes(this) == 8 ? __ T8B : __ T16B,
2551              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2552     }
2553     __ mov(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)),
2554            $idx$$constant, $val$$Register);
2555   %}
2556   ins_pipe(pipe_slow);
2557 %}
2558 
2559 instruct insertI_small_index(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg vtmp, pRegGov pgtmp, rFlagsReg cr) %{
2560   predicate(UseSVE > 0 && n->in(2)->get_int() < 32 &&
2561             Matcher::vector_length_in_bytes(n) > 16 &&
2562             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2563              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2564              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2565   match(Set dst (VectorInsert (Binary src val) idx));
2566   effect(TEMP vtmp, TEMP pgtmp, KILL cr);
2567   ins_cost(4 * SVE_COST);
2568   format %{ "sve_index $vtmp, -16, 1\t# (B/H/S)\n\t"
2569             "sve_cmpeq $pgtmp, $vtmp, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2570             "sve_orr $dst, $src, $src\n\t"
2571             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
2572   ins_encode %{
2573     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2574     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2575     __ block_comment("insert into vector (B/H/S) {");
2576       __ sve_index(as_FloatRegister($vtmp$$reg), size, -16, 1);
2577       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
2578                  as_FloatRegister($vtmp$$reg), (int)($idx$$constant) - 16);
2579       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2580         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2581       }
2582       __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), $val$$Register);
2583     __ block_comment("} insert into vector (B/H/S)");
2584   %}
2585   ins_pipe(pipe_slow);
2586 %}
2587 
2588 instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg vtmp1, vReg vtmp2, pRegGov pgtmp, rFlagsReg cr) %{
2589   predicate(UseSVE > 0 && n->in(2)->get_int() >= 32 &&
2590             Matcher::vector_length_in_bytes(n) > 16 &&
2591             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2592              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2593              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2594   match(Set dst (VectorInsert (Binary src val) idx));
2595   effect(TEMP vtmp1, TEMP vtmp2, TEMP pgtmp, KILL cr);
2596   ins_cost(5 * SVE_COST);
2597   format %{ "sve_index $vtmp1, 0, 1\t# (B/H/S)\n\t"
2598             "sve_dup $vtmp2, $idx\t# (B/H/S)\n\t"
2599             "sve_cmpeq $pgtmp, $vtmp1, $vtmp2\n\t"
2600             "sve_orr $dst, $src, $src\n\t"
2601             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
2602   ins_encode %{
2603     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2604     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2605     __ block_comment("insert into vector (B/H/S) {");
2606       __ sve_index(as_FloatRegister($vtmp1$$reg), size, 0, 1);
2607       __ sve_dup(as_FloatRegister($vtmp2$$reg), size, (int)($idx$$constant));
2608       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
2609                  as_FloatRegister($vtmp1$$reg), as_FloatRegister($vtmp2$$reg));
2610       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2611         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2612       }
2613       __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), $val$$Register);
2614     __ block_comment("} insert into vector (B/H/S)");
2615   %}
2616   ins_pipe(pipe_slow);
2617 %}
2618 
2619 instruct insertL_128bits(vReg dst, vReg src, iRegL val, immI idx) %{
2620   predicate(UseSVE > 0 && Matcher::vector_length_in_bytes(n) == 16 &&
2621             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2622   match(Set dst (VectorInsert (Binary src val) idx));
2623   ins_cost(2 * INSN_COST);
2624   format %{ "orr    $dst, T16B, $src, $src\n\t"
2625             "mov    $dst, D, $idx, $val\t# insertL into vector(128bits)" %}
2626   ins_encode %{
2627     BasicType bt = Matcher::vector_element_basic_type(this);
2628     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2629       __ orr(as_FloatRegister($dst$$reg), __ T16B,
2630              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2631     }
2632     __ mov(as_FloatRegister($dst$$reg), __ D, $idx$$constant, $val$$Register);
2633   %}
2634   ins_pipe(pipe_slow);
2635 %}
2636 
2637 instruct insertL(vReg dst, vReg src, iRegL val, immI idx, vReg vtmp, pRegGov pgtmp, rFlagsReg cr) %{
2638   predicate(UseSVE > 0 &&
2639             Matcher::vector_length_in_bytes(n) > 16 &&
2640             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2641   match(Set dst (VectorInsert (Binary src val) idx));
2642   effect(TEMP vtmp, TEMP pgtmp, KILL cr);
2643   ins_cost(4 * SVE_COST);
2644   format %{ "sve_index $vtmp, D, -16, 1\n\t"
2645             "sve_cmpeq $pgtmp, $vtmp, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2646             "sve_orr $dst, $src, $src\n\t"
2647             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (L)" %}
2648   ins_encode %{
2649     __ block_comment("insert into vector (L) {");
2650       __ sve_index(as_FloatRegister($vtmp$$reg), __ D, -16, 1);
2651       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue,
2652                  as_FloatRegister($vtmp$$reg), (int)($idx$$constant) - 16);
2653       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2654         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2655       }
2656       __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
2657                  as_PRegister($pgtmp$$reg), $val$$Register);
2658     __ block_comment("} insert into vector (L)");
2659   %}
2660   ins_pipe(pipe_slow);
2661 %}
2662 
2663 instruct insertF_le128bits(vReg dst, vReg src, vRegF val, immI idx) %{
2664   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
2665             (Matcher::vector_length_in_bytes(n) == 8 || Matcher::vector_length_in_bytes(n) == 16));
2666   match(Set dst (VectorInsert (Binary src val) idx));
2667   ins_cost(2 * INSN_COST);
2668   effect(TEMP_DEF dst);
2669   format %{ "orr    $dst, T8/16B, $src, $src\n\t"
2670             "ins    $dst, S, $val, $idx, 0\t# insertF into vector(64/128bits)" %}
2671   ins_encode %{
2672     __ orr(as_FloatRegister($dst$$reg), Matcher::vector_length_in_bytes(this) == 8 ? __ T8B : __ T16B,
2673            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2674     __ ins(as_FloatRegister($dst$$reg), __ S,
2675            as_FloatRegister($val$$reg), $idx$$constant, 0);
2676   %}
2677   ins_pipe(pipe_slow);
2678 %}
2679 
2680 instruct insertF_small_index(vReg dst, vReg src, vRegF val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{
2681   predicate(UseSVE > 0 && n->in(2)->get_int() < 32 &&
2682             Matcher::vector_length_in_bytes(n) > 16 &&
2683             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2684   match(Set dst (VectorInsert (Binary src val) idx));
2685   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
2686   ins_cost(4 * SVE_COST);
2687   format %{ "sve_index $dst, S, -16, 1\n\t"
2688             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2689             "sve_orr $dst, $src, $src\n\t"
2690             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
2691   ins_encode %{
2692     __ block_comment("insert into vector (F) {");
2693       __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
2694       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
2695                  as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2696       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2697       __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
2698     __ block_comment("} insert into vector (F)");
2699   %}
2700   ins_pipe(pipe_slow);
2701 %}
2702 
2703 instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr) %{
2704   predicate(UseSVE > 0 && n->in(2)->get_int() >= 32 &&
2705             Matcher::vector_length_in_bytes(n) > 16 &&
2706             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2707   match(Set dst (VectorInsert (Binary src val) idx));
2708   effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr);
2709   ins_cost(5 * SVE_COST);
2710   format %{ "sve_index $tmp1, S, 0, 1\n\t"
2711             "sve_dup $dst, S, $idx\n\t"
2712             "sve_cmpeq $pgtmp, $tmp1, $dst\n\t"
2713             "sve_orr $dst, $src, $src\n\t"
2714             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
2715   ins_encode %{
2716     __ block_comment("insert into vector (F) {");
2717       __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
2718       __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
2719       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
2720                  as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
2721       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2722                  as_FloatRegister($src$$reg));
2723       __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
2724                  as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
2725     __ block_comment("} insert into vector (F)");
2726   %}
2727   ins_pipe(pipe_slow);
2728 %}
2729 
2730 instruct insertD_128bits(vReg dst, vReg src, vRegD val, immI idx) %{
2731   predicate(UseSVE > 0 && Matcher::vector_length_in_bytes(n) == 16 &&
2732             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2733   match(Set dst (VectorInsert (Binary src val) idx));
2734   ins_cost(2 * INSN_COST);
2735   effect(TEMP_DEF dst);
2736   format %{ "orr    $dst, T16B, $src, $src\n\t"
2737             "ins    $dst, D, $val, $idx, 0\t# insertD into vector(128bits)" %}
2738   ins_encode %{
2739     __ orr(as_FloatRegister($dst$$reg), __ T16B,
2740            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2741     __ ins(as_FloatRegister($dst$$reg), __ D,
2742            as_FloatRegister($val$$reg), $idx$$constant, 0);
2743   %}
2744   ins_pipe(pipe_slow);
2745 %}
2746 
2747 instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pgtmp, rFlagsReg cr) %{
2748   predicate(UseSVE > 0 &&
2749             Matcher::vector_length_in_bytes(n) > 16 &&
2750             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2751   match(Set dst (VectorInsert (Binary src val) idx));
2752   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
2753   ins_cost(4 * SVE_COST);
2754   format %{ "sve_index $dst, D, -16, 1\n\t"
2755             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2756             "sve_orr $dst, $src, $src\n\t"
2757             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (D)" %}
2758   ins_encode %{
2759     __ block_comment("insert into vector (D) {");
2760       __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
2761       __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue,
2762                  as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2763       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2764                  as_FloatRegister($src$$reg));
2765       __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
2766                  as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
2767     __ block_comment("} insert into vector (D)");
2768   %}
2769   ins_pipe(pipe_slow);
2770 %}
2771 
2772 // ------------------------------ Vector shuffle -------------------------------
2773 
2774 instruct loadshuffle(vReg dst, vReg src) %{
2775   predicate(UseSVE > 0);
2776   match(Set dst (VectorLoadShuffle src));
2777   ins_cost(SVE_COST);
2778   format %{ "sve_loadshuffle $dst, $src\t# vector load shuffle (B/H/S/D)" %}
2779   ins_encode %{
2780     BasicType bt = Matcher::vector_element_basic_type(this);
2781     if (bt == T_BYTE) {
2782       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2783         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2784                    as_FloatRegister($src$$reg));
2785       }
2786     } else {
2787       __ sve_vector_extend(as_FloatRegister($dst$$reg),  __ elemType_to_regVariant(bt),
2788                            as_FloatRegister($src$$reg), __ B);
2789     }
2790   %}
2791   ins_pipe(pipe_slow);
2792 %}
2793 
2794 // ------------------------------ Vector rearrange -------------------------------
2795 
2796 instruct rearrange(vReg dst, vReg src, vReg shuffle)
2797 %{
2798   predicate(UseSVE > 0);
2799   match(Set dst (VectorRearrange src shuffle));
2800   ins_cost(SVE_COST);
2801   format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %}
2802   ins_encode %{
2803     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2804     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2805     __ sve_tbl(as_FloatRegister($dst$$reg), size,
2806                as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
2807   %}
2808   ins_pipe(pipe_slow);
2809 %}
2810 
2811 // ------------------------------ Vector Load Gather ---------------------------------
2812 
2813 instruct gatherI(vReg dst, indirect mem, vReg idx) %{
2814   predicate(UseSVE > 0 &&
2815             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2816             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2817              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2818   match(Set dst (LoadVectorGather mem idx));
2819   ins_cost(SVE_COST);
2820   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (S)" %}
2821   ins_encode %{
2822     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
2823                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2824   %}
2825   ins_pipe(pipe_slow);
2826 %}
2827 
2828 instruct gatherL(vReg dst, indirect mem, vReg idx) %{
2829   predicate(UseSVE > 0 &&
2830             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2831             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2832              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2833   match(Set dst (LoadVectorGather mem idx));
2834   ins_cost(2 * SVE_COST);
2835   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (D)" %}
2836   ins_encode %{
2837     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2838     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base),
2839                        as_FloatRegister($idx$$reg));
2840   %}
2841   ins_pipe(pipe_slow);
2842 %}
2843 
2844 // ------------------------------ Vector Load Gather Partial-------------------------------
2845 
2846 instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2847   predicate(UseSVE > 0 &&
2848             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2849             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2850              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2851   match(Set dst (LoadVectorGather mem idx));
2852   effect(TEMP ptmp, KILL cr);
2853   ins_cost(2 * SVE_COST + INSN_COST);
2854   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (S)" %}
2855   ins_encode %{
2856     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this));
2857     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2858                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2859   %}
2860   ins_pipe(pipe_slow);
2861 %}
2862 
2863 instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2864   predicate(UseSVE > 0 &&
2865             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2866             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2867              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2868   match(Set dst (LoadVectorGather mem idx));
2869   effect(TEMP ptmp, KILL cr);
2870   ins_cost(3 * SVE_COST + INSN_COST);
2871   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (D)" %}
2872   ins_encode %{
2873     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this));
2874     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2875     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2876                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2877   %}
2878   ins_pipe(pipe_slow);
2879 %}
2880 
2881 // ------------------------------ Vector Load Gather Predicated -------------------------------
2882 
2883 instruct gatherI_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
2884   predicate(UseSVE > 0 &&
2885             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2886              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2887   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2888   ins_cost(SVE_COST);
2889   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (S)" %}
2890   ins_encode %{
2891     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
2892                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2893   %}
2894   ins_pipe(pipe_slow);
2895 %}
2896 
2897 instruct gatherL_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
2898   predicate(UseSVE > 0 &&
2899             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2900              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2901   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2902   ins_cost(2 * SVE_COST);
2903   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (D)" %}
2904   ins_encode %{
2905     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2906     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
2907                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2908   %}
2909   ins_pipe(pipe_slow);
2910 %}
2911 
2912 // ------------------------------ Vector Store Scatter -------------------------------
2913 
2914 instruct scatterI(indirect mem, vReg src, vReg idx) %{
2915   predicate(UseSVE > 0 &&
2916             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2917             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2918              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2919   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2920   ins_cost(SVE_COST);
2921   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (S)" %}
2922   ins_encode %{
2923     __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
2924                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2925   %}
2926   ins_pipe(pipe_slow);
2927 %}
2928 
2929 instruct scatterL(indirect mem, vReg src, vReg idx) %{
2930   predicate(UseSVE > 0 &&
2931             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2932             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2933              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2934   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2935   ins_cost(2 * SVE_COST);
2936   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (D)" %}
2937   ins_encode %{
2938     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2939     __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
2940                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2941   %}
2942   ins_pipe(pipe_slow);
2943 %}
2944 
2945 // ------------------------------ Vector Store Scatter Partial -------------------------------
2946 
2947 instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2948   predicate(UseSVE > 0 &&
2949             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2950             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2951              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2952   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2953   effect(TEMP ptmp, KILL cr);
2954   ins_cost(2 * SVE_COST + INSN_COST);
2955   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (S)" %}
2956   ins_encode %{
2957     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this, $src));
2958     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2959                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2960   %}
2961   ins_pipe(pipe_slow);
2962 %}
2963 
2964 instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2965   predicate(UseSVE > 0 &&
2966             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2967             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2968              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2969   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2970   effect(TEMP ptmp, KILL cr);
2971   ins_cost(3 * SVE_COST + INSN_COST);
2972   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (D)" %}
2973   ins_encode %{
2974     __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this, $src));
2975     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2976     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2977                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2978   %}
2979   ins_pipe(pipe_slow);
2980 %}
2981 
2982 // ------------------------------ Vector Store Scatter Predicated -------------------------------
2983 
2984 instruct scatterI_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
2985   predicate(UseSVE > 0 &&
2986             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2987              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2988   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
2989   ins_cost(SVE_COST);
2990   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicate (S)" %}
2991   ins_encode %{
2992     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
2993                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2994   %}
2995   ins_pipe(pipe_slow);
2996 %}
2997 
2998 instruct scatterL_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
2999   predicate(UseSVE > 0 &&
3000             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3001              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3002   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
3003   ins_cost(2 * SVE_COST);
3004   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated (D)" %}
3005   ins_encode %{
3006     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
3007     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
3008                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
3009   %}
3010   ins_pipe(pipe_slow);
3011 %}
3012 
3013 // ------------------------------ Vector Load Const -------------------------------
3014 
3015 instruct loadconB(vReg dst, immI0 src) %{
3016   predicate(UseSVE > 0 &&
3017             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3018   match(Set dst (VectorLoadConst src));
3019   ins_cost(SVE_COST);
3020   format %{ "sve_index $dst, 0, 1\t# generate iota indices" %}
3021   ins_encode %{
3022     __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1);
3023   %}
3024   ins_pipe(pipe_slow);
3025 %}
3026 
3027 // Intrisics for String.indexOf(char)
3028 
3029 dnl
3030 define(`STRING_INDEXOF_CHAR', `
3031 instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
3032                                   iRegI_R0 result, vReg ztmp1, vReg ztmp2,
3033                                   pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
3034 %{
3035   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
3036   predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1));
3037   effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
3038 
3039   format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
3040 
3041   ins_encode %{
3042     __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
3043                                as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
3044                                as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */);
3045   %}
3046   ins_pipe(pipe_class_memory);
3047 %}')dnl
3048 dnl                 $1 $2      $3
3049 STRING_INDEXOF_CHAR(L, Latin1, true)
3050 STRING_INDEXOF_CHAR(U, UTF16,  false)
3051 
3052 // ---------------------------- Vector mask reductions ---------------------------
3053 instruct vmask_truecount(iRegINoSp dst, pReg src) %{
3054   predicate(UseSVE > 0);
3055   match(Set dst (VectorMaskTrueCount src));
3056   ins_cost(SVE_COST);
3057   format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %}
3058   ins_encode %{
3059     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3060     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3061     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($src$$reg));
3062   %}
3063   ins_pipe(pipe_slow);
3064 %}
3065 
3066 // Return the index of the first mask lane that is set, or vector length if none of
3067 // them are set.
3068 instruct vmask_firsttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
3069   predicate(UseSVE > 0);
3070   match(Set dst (VectorMaskFirstTrue src));
3071   effect(TEMP ptmp);
3072   ins_cost(3 * SVE_COST);
3073   format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
3074   ins_encode %{
3075     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3076     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3077     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3078     // When the input predicate is all-false, the result should be the vector length
3079     // instead of max vector register size.
3080     if (length_in_bytes == MaxVectorSize) {
3081       __ sve_brkb(as_PRegister($ptmp$$reg), ptrue, as_PRegister($src$$reg), false);
3082     } else {
3083       __ sve_ptrue_lanecnt(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
3084       __ sve_brkb(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), as_PRegister($src$$reg), false);
3085     }
3086     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($ptmp$$reg));
3087   %}
3088   ins_pipe(pipe_slow);
3089 %}
3090 
3091 instruct vmask_lasttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
3092   predicate(UseSVE > 0);
3093   match(Set dst (VectorMaskLastTrue src));
3094   effect(TEMP ptmp);
3095   ins_cost(3 * SVE_COST);
3096   format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
3097   ins_encode %{
3098     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3099     __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($src$$reg), as_PRegister($ptmp$$reg));
3100   %}
3101   ins_pipe(pipe_slow);
3102 %}
3103 
3104 instruct vmask_tolong(iRegLNoSp dst, pReg src, vReg vtmp1, vReg vtmp2) %{
3105   predicate(UseSVE > 0 &&
3106             n->in(1)->bottom_type()->is_vect()->length() <= 64);
3107   match(Set dst (VectorMaskToLong src));
3108   effect(TEMP vtmp1, TEMP vtmp2);
3109   ins_cost(13 * SVE_COST);
3110   format %{ "vmask_tolong $dst, $src\t# vector mask tolong (sve)" %}
3111   ins_encode %{
3112     __ sve_vmask_tolong(as_Register($dst$$reg), as_PRegister($src$$reg),
3113                         Matcher::vector_element_basic_type(this, $src),
3114                         Matcher::vector_length(this, $src),
3115                         as_FloatRegister($vtmp1$$reg), as_FloatRegister($vtmp2$$reg));
3116   %}
3117   ins_pipe(pipe_slow);
3118 %}
3119 
3120 // ---------------------------- Vector mask generation ---------------------------
3121 // The rules below set predicate registers. They can guarantee the high bits of dst
3122 // are cleared with zero when the vector length is less than the full size of
3123 // hardware vector register width.
3124 
3125 define(`MASKALL_IMM', `
3126 instruct vmaskAll_imm$1(pRegGov dst, imm$1 src) %{
3127   predicate(UseSVE > 0);
3128   match(Set dst (MaskAll src));
3129   ins_cost(SVE_COST);
3130   format %{ "sve_ptrue_lanecnt/sve_pfalse $dst\t# mask all (sve) ($2)" %}
3131   ins_encode %{
3132     ifelse($1, `I', int, long) con = (ifelse($1, `I', int, long))$src$$constant;
3133     if (con == 0) {
3134       __ sve_pfalse(as_PRegister($dst$$reg));
3135     } else {
3136       assert(con == -1, "invalid constant value for mask");
3137       BasicType bt = Matcher::vector_element_basic_type(this);
3138       __ sve_ptrue_lanecnt(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt),
3139                            Matcher::vector_length(this));
3140     }
3141   %}
3142   ins_pipe(pipe_slow);
3143 %}')dnl
3144 
3145 define(`MASKALL', `
3146 instruct vmaskAll$1(pRegGov dst, ifelse($1, `I', iRegIorL2I, iRegL) src, vReg tmp, rFlagsReg cr) %{
3147   predicate(UseSVE > 0);
3148   match(Set dst (MaskAll src));
3149   effect(TEMP tmp, KILL cr);
3150   ins_cost(3 * SVE_COST);
3151   format %{ "sve_dup $tmp, $src\n\t"
3152             "sve_ptrue $dst, vector_length\n\t"
3153             "sve_cmpne $dst, $dst, $tmp, 0\t# mask all (sve) ($2)" %}
3154   ins_encode %{
3155     BasicType bt = Matcher::vector_element_basic_type(this);
3156     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3157     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
3158     __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg));
3159     if (length_in_bytes < MaxVectorSize) {
3160       __ sve_ptrue_lanecnt(as_PRegister($dst$$reg), size, Matcher::vector_length(this));
3161       __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size,
3162                  as_PRegister($dst$$reg), as_FloatRegister($tmp$$reg), 0);
3163     } else {
3164       __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
3165     }
3166   %}
3167   ins_pipe(pipe_slow);
3168 %}')dnl
3169 dnl
3170 // maskAll (full or partial predicate size)
3171 MASKALL_IMM(I, B/H/S)
3172 MASKALL(I, B/H/S)
3173 MASKALL_IMM(L, D)
3174 MASKALL(L, D)
3175 
3176 // vector mask compare
3177 
3178 instruct vmaskcmp(pRegGov dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
3179   predicate(UseSVE > 0);
3180   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
3181   effect(KILL cr);
3182   ins_cost(2 * SVE_COST);
3183   format %{ "sve_cmp $dst, $src1, $src2\t# vector mask cmp (sve)" %}
3184   ins_encode %{
3185     uint length_in_bytes = Matcher::vector_length_in_bytes(this);
3186     BasicType bt = Matcher::vector_element_basic_type(this);
3187     if (length_in_bytes == MaxVectorSize) {
3188       __ sve_compare(as_PRegister($dst$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
3189                      as_FloatRegister($src2$$reg), (int)$cond$$constant);
3190     } else {
3191       __ sve_ptrue_lanecnt(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt),
3192                            Matcher::vector_length(this));
3193       __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($dst$$reg), as_FloatRegister($src1$$reg),
3194                      as_FloatRegister($src2$$reg), (int)$cond$$constant);
3195     }
3196   %}
3197   ins_pipe(pipe_slow);
3198 %}
3199 
3200 instruct vmaskcmp_masked(pRegGov dst, vReg src1, vReg src2, immI cond, pRegGov pg, rFlagsReg cr) %{
3201   predicate(UseSVE > 0);
3202   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond pg)));
3203   effect(KILL cr);
3204   ins_cost(SVE_COST);
3205   format %{ "sve_cmp $dst, $pg, $src1, $src2\t# vector mask cmp (sve)" %}
3206   ins_encode %{
3207     BasicType bt = Matcher::vector_element_basic_type(this);
3208     __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($pg$$reg), as_FloatRegister($src1$$reg),
3209                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
3210   %}
3211   ins_pipe(pipe_slow);
3212 %}
3213 
3214 // vector load mask
3215 
3216 instruct vloadmaskB(pRegGov dst, vReg src, rFlagsReg cr) %{
3217   predicate(UseSVE > 0 &&
3218             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3219   match(Set dst (VectorLoadMask src));
3220   effect(KILL cr);
3221   ins_cost(SVE_COST);
3222   format %{ "vloadmaskB $dst, $src\t# vector load mask (sve) (B)" %}
3223   ins_encode %{
3224     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ B,
3225                ptrue, as_FloatRegister($src$$reg), 0);
3226   %}
3227   ins_pipe(pipe_slow);
3228 %}
3229 
3230 instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
3231   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() != T_BYTE);
3232   match(Set dst (VectorLoadMask src));
3233   effect(TEMP tmp, KILL cr);
3234   ins_cost(3 * SVE_COST);
3235   format %{ "vloadmask $dst, $src\t# vector load mask (sve) (H/S/D)" %}
3236   ins_encode %{
3237     BasicType bt = Matcher::vector_element_basic_type(this);
3238     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3239     __ sve_vector_extend(as_FloatRegister($tmp$$reg), size, as_FloatRegister($src$$reg), __ B);
3240     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
3241   %}
3242   ins_pipe(pipe_slow);
3243 %}
3244 
3245 // ---------------------------- Compress/Expand Operations ---------------------------
3246 
3247 instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
3248   predicate(UseSVE > 0);
3249   match(Set dst (CompressM pg));
3250   effect(KILL cr);
3251   ins_cost(2 * SVE_COST);
3252   format %{ "sve_cntp rscratch1, $pg\n\t"
3253             "sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
3254   ins_encode %{
3255     BasicType bt = Matcher::vector_element_basic_type(this);
3256     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3257     __ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
3258     __ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
3259   %}
3260   ins_pipe(pipe_slow);
3261 %}
3262 
3263 instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
3264   predicate(UseSVE > 0 &&
3265             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3266              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
3267              n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3268              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3269   match(Set dst (CompressV src pg));
3270   ins_cost(SVE_COST);
3271   format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
3272   ins_encode %{
3273     BasicType bt = Matcher::vector_element_basic_type(this);
3274     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3275     __ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
3276   %}
3277   ins_pipe(pipe_slow);
3278 %}
3279 
3280 instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
3281                     pReg ptmp, pRegGov pgtmp) %{
3282   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3283   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
3284   match(Set dst (CompressV src pg));
3285   ins_cost(13 * SVE_COST);
3286   format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
3287   ins_encode %{
3288     __ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
3289                          as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
3290                          as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
3291                          as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
3292   %}
3293   ins_pipe(pipe_slow);
3294 %}
3295 
3296 instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
3297   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
3298   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
3299   match(Set dst (CompressV src pg));
3300   ins_cost(38 * SVE_COST);
3301   format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
3302   ins_encode %{
3303     __ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
3304                           as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
3305   %}
3306   ins_pipe(pipe_slow);
3307 %}
3308 
3309 instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
3310   match(Set dst (ExpandV src pg));
3311   effect(TEMP_DEF dst);
3312   ins_cost(4 * SVE_COST);
3313   format %{ "sve_dup $dst, S/D, 0\n\t"
3314             "sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
3315             "sve_sub $dst, S/D, 1\n\t"
3316             "sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
3317   ins_encode %{
3318     // Example input:   src   = 1 2 3 4 5 6 7 8
3319     //                  pg    = 1 0 0 1 1 0 1 1
3320     // Expected result: dst   = 4 0 0 5 6 0 7 8
3321 
3322     // The basic idea is to use TBL which can shuffle the elements in the given
3323     // vector flexibly. HISTCNT + SUB is used to generate the second source input
3324     // for TBL whose value is used to select the indexed element from src vector.
3325 
3326     BasicType bt = Matcher::vector_element_basic_type(this);
3327     assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
3328     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3329     // dst = 0 0 0 0 0 0 0 0
3330     __ sve_dup(as_FloatRegister($dst$$reg), size, 0);
3331     // dst = 5 0 0 4 3 0 2 1
3332     __ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
3333                    as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
3334     // dst = 4 -1 -1 3 2 -1 1 0
3335     __ sve_sub(as_FloatRegister($dst$$reg), size, 1);
3336     // dst = 4 0 0 5 6 0 7 8
3337     __ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
3338                as_FloatRegister($dst$$reg));
3339   %}
3340   ins_pipe(pipe_slow);
3341 %}
3342 
3343 instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
3344   predicate(UseSVE > 0);
3345   match(Set pg (VectorMaskGen len));
3346   effect(KILL cr);
3347   ins_cost(SVE_COST);
3348   format %{ "sve_whilelo $pg, zr, $len\t # sve" %}
3349   ins_encode %{
3350     BasicType bt = Matcher::vector_element_basic_type(this);
3351     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3352     __ sve_whilelo(as_PRegister($pg$$reg), size, zr, as_Register($len$$reg));
3353   %}
3354   ins_pipe(pipe_slow);
3355 %}
3356 
3357 dnl
3358 dnl BITWISE_UNARY($1,        $2,      $3  )
3359 dnl BITWISE_UNARY(insn_name, op_name, insn)
3360 define(`BITWISE_UNARY', `
3361 instruct $1(vReg dst, vReg src) %{
3362   predicate(UseSVE > 0 &&
3363             !n->as_Vector()->is_predicated_vector());
3364   match(Set dst ($2 src));
3365   ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
3366   format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst, $src\n\t"
3367             "$3  $dst, $dst', `"$3 $dst, $src')\t# vector (sve)" %}
3368   ins_encode %{
3369     BasicType bt = Matcher::vector_element_basic_type(this);
3370     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
3371 ifelse($2, `CountTrailingZerosV', `
3372     __ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));', `')dnl
3373 ifelse($2, `ReverseBytesV', `
3374     if (bt == T_BYTE) {
3375       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
3376         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3377       }
3378     } else {
3379       __ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
3380     }', `
3381     __ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($ifelse($2, `CountTrailingZerosV', dst, src)$$reg));')
3382   %}
3383   ins_pipe(pipe_slow);
3384 %}')dnl
3385 dnl
3386 dnl BITWISE_UNARY_PREDICATE($1,        $2,      $3  )
3387 dnl BITWISE_UNARY_PREDICATE(insn_name, op_name, insn)
3388 define(`BITWISE_UNARY_PREDICATE', `
3389 // The dst and src should use the same register to make sure the
3390 // inactive lanes in dst save the same elements as src.
3391 instruct $1_masked(vReg dst_src, pRegGov pg) %{
3392   predicate(UseSVE > 0);
3393   match(Set dst_src ($2 dst_src pg));
3394   ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
3395   format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst_src, $pg, $dst_src\n\t"
3396             "$3  $dst_src, $pg, $dst_src', `"$3 $dst_src, $pg, $dst_src')\t# vector (sve)" %}
3397   ins_encode %{
3398     BasicType bt = Matcher::vector_element_basic_type(this);
3399     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
3400 ifelse($2, `CountTrailingZerosV', `
3401     __ sve_rbit(as_FloatRegister($dst_src$$reg), size,
3402         as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));', `')dnl
3403 ifelse($2, `ReverseBytesV', `
3404     if (bt == T_BYTE) {
3405       // do nothing
3406     } else {
3407       __ $3(as_FloatRegister($dst_src$$reg), size,
3408           as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
3409     }', `
3410     __ $3(as_FloatRegister($dst_src$$reg), size,
3411         as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));')
3412   %}
3413   ins_pipe(pipe_slow);
3414 %}')dnl
3415 dnl
3416 // ------------------------------ CountLeadingZerosV ------------------------------
3417 BITWISE_UNARY(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
3418 BITWISE_UNARY_PREDICATE(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
3419 
3420 // ------------------------------ CountTrailingZerosV -----------------------------
3421 BITWISE_UNARY(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
3422 BITWISE_UNARY_PREDICATE(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
3423 
3424 // ---------------------------------- ReverseV ------------------------------------
3425 BITWISE_UNARY(vreverse, ReverseV, sve_rbit)
3426 BITWISE_UNARY_PREDICATE(vreverse, ReverseV, sve_rbit)
3427 
3428 // -------------------------------- ReverseBytesV ---------------------------------
3429 BITWISE_UNARY(vreverseBytes, ReverseBytesV, sve_revb)
3430 BITWISE_UNARY_PREDICATE(vreverseBytes, ReverseBytesV, sve_revb)
3431