1 //
   2 // Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 dnl Generate the warning
  27 // This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
  28 dnl
  29 
  30 // AArch64 SVE Architecture Description File
  31 
  32 dnl
  33 define(`TYPE2DATATYPE',
  34 `ifelse($1, `B', `BYTE',
  35         $1, `S', `SHORT',
  36         $1, `I', `INT',
  37         $1, `L', `LONG',
  38         $1, `F', `FLOAT',
  39         $1, `D', `DOUBLE',
  40         `error($1)')')dnl
  41 dnl
  42 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1,            $2,       $3       $4   )
  43 dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len, scale)
  44 define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
  45 operand vmemA_imm$1Offset$3()
  46 %{
  47   // (esize / msize) = $4
  48   predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
  49             Matcher::scalable_vector_reg_size(T_BYTE)ifelse($4, `1', `', ` / $4')));
  50   match(Con$1);
  51 
  52   op_cost(0);
  53   format %{ %}
  54   interface(CONST_INTER);
  55 %}')dnl
  56 
  57 // 4 bit signed offset -- for predicated load/store
  58 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int,  4, 1)
  59 OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4, 1)
  60 dnl
  61 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1,            $2     )
  62 dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
  63 define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
  64 operand vmemA_indOff$1$2$3(iRegP reg, vmemA_imm$1Offset$2 off)
  65 %{
  66   constraint(ALLOC_IN_RC(ptr_reg));
  67   match(AddP reg off);
  68   op_cost(0);
  69   format %{ "[$reg, $off]" %}
  70   interface(MEMORY_INTER) %{
  71     base($reg);
  72     `index'(0xffffffff);
  73     scale(0x0);
  74     disp($off);
  75   %}
  76 %}')dnl
  77 OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
  78 OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
  79 
  80 // The indOff of vmemA is valid only when the vector element (load to/store from)
  81 // size equals to memory element (load from/store to) size.
  82 opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  83 
  84 source_hpp %{
  85   bool op_sve_supported(int opcode, int vlen, BasicType bt);
  86   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt);
  87 %}
  88 
  89 source %{
  90 
  91   typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
  92                                                              PRegister Pg, const Address &adr);
  93 
  94   // Predicated load/store, with optional ptrue to all elements of given predicate register.
  95   static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
  96                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
  97                                     int opcode, Register base, int index, int size, int disp) {
  98     sve_mem_insn_predicate insn;
  99     int mesize = type2aelembytes(mem_elem_bt);
 100     if (index == -1) {
 101       assert(size == 0, "unsupported address mode: scale size = %d", size);
 102       switch(mesize) {
 103       case 1:
 104         insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
 105         break;
 106       case 2:
 107         insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
 108         break;
 109       case 4:
 110         insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
 111         break;
 112       case 8:
 113         insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
 114         break;
 115       default:
 116         assert(false, "unsupported");
 117         ShouldNotReachHere();
 118       }
 119       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
 120       (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
 121     } else {
 122       assert(false, "unimplemented");
 123       ShouldNotReachHere();
 124     }
 125   }
 126 
 127   bool op_sve_supported(int opcode, int vlen, BasicType bt) {
 128     int length_in_bytes = vlen * type2aelembytes(bt);
 129     switch (opcode) {
 130       case Op_MulAddVS2VI:
 131       // No multiply reduction instructions
 132       case Op_MulReductionVD:
 133       case Op_MulReductionVF:
 134       case Op_MulReductionVI:
 135       case Op_MulReductionVL:
 136       // Others
 137       case Op_ExtractC:
 138       case Op_ExtractUB:
 139         return false;
 140       // Vector API specific
 141       case Op_VectorLoadShuffle:
 142       case Op_VectorRearrange:
 143         return vlen >= 4 && length_in_bytes <= MaxVectorSize;
 144       case Op_LoadVector:
 145       case Op_StoreVector:
 146         return Matcher::vector_size_supported(bt, vlen);
 147       default:
 148         break;
 149     }
 150     // By default, we only support vector operations with no less than 8 bytes and 2 elements.
 151     return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
 152   }
 153 
 154   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt) {
 155     if (opcode == Op_VectorRearrange) {
 156       return false;
 157     }
 158     return op_sve_supported(opcode, vlen, bt);
 159   }
 160 
 161 %}
 162 
 163 definitions %{
 164   int_def SVE_COST             (200, 200);
 165 %}
 166 
 167 dnl
 168 dnl ELEMENT_SHORT_CHART($1, $2)
 169 dnl ELEMENT_SHORT_CHART(etype, node)
 170 define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
 171   `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
 172             ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
 173    `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')dnl
 174 dnl
 175 
 176 // All SVE instructions
 177 
 178 // vector load/store
 179 
 180 // Unpredicated vector load/store
 181 instruct loadV(vReg dst, vmemA mem) %{
 182   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 &&
 183             n->as_LoadVector()->memory_size() == MaxVectorSize);
 184   match(Set dst (LoadVector mem));
 185   ins_cost(4 * SVE_COST);
 186   format %{ "sve_ldr $dst, $mem\t# vector (sve)" %}
 187   ins_encode %{
 188     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 189     BasicType bt = Matcher::vector_element_basic_type(this);
 190     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 191                           bt, bt, $mem->opcode(),
 192                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 193   %}
 194   ins_pipe(pipe_slow);
 195 %}
 196 
 197 instruct storeV(vReg src, vmemA mem) %{
 198   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 &&
 199             n->as_StoreVector()->memory_size() == MaxVectorSize);
 200   match(Set mem (StoreVector mem src));
 201   ins_cost(4 * SVE_COST);
 202   format %{ "sve_str $mem, $src\t# vector (sve)" %}
 203   ins_encode %{
 204     FloatRegister src_reg = as_FloatRegister($src$$reg);
 205     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 206     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
 207                           bt, bt, $mem->opcode(),
 208                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 209   %}
 210   ins_pipe(pipe_slow);
 211 %}dnl
 212 
 213 dnl
 214 define(`VLoadStore', `
 215 // ifelse(load, $3, Load, Store) Vector ($6 bits)
 216 instruct $3V$4_vreg`'(vReg $7, vmem$4 mem)
 217 %{
 218   predicate(UseSVE > 0 && `n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4);
 219   match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src)));
 220   ins_cost(4 * INSN_COST);
 221   format %{ "$1   ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %}
 222   ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) );
 223   ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64));
 224 %}')dnl
 225 dnl        $1    $2 $3     $4  $5 $6   $7
 226 VLoadStore(ldrh, H, load,  2,  D, 16,  dst)
 227 VLoadStore(strh, H, store, 2,  D, 16,  src)
 228 VLoadStore(ldrs, S, load,  4,  D, 32,  dst)
 229 VLoadStore(strs, S, store, 4,  D, 32,  src)
 230 VLoadStore(ldrd, D, load,  8,  D, 64,  dst)
 231 VLoadStore(strd, D, store, 8,  D, 64,  src)
 232 VLoadStore(ldrq, Q, load, 16,  X, 128, dst)
 233 VLoadStore(strq, Q, store, 16, X, 128, src)
 234 
 235 // Predicated vector load/store, based on the vector length of the node.
 236 // Only load/store values in the range of the memory_size. This is needed
 237 // when the memory_size is lower than the hardware supported max vector size.
 238 // And this might happen for Vector API mask vector load/store.
 239 instruct loadV_partial(vReg dst, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
 240   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
 241             n->as_LoadVector()->memory_size() < MaxVectorSize);
 242   match(Set dst (LoadVector mem));
 243   effect(TEMP pgtmp, KILL cr);
 244   ins_cost(6 * SVE_COST);
 245   format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t"
 246             "sve_ldr $dst, $pgtmp, $mem\t# load vector partial" %}
 247   ins_encode %{
 248     BasicType bt = Matcher::vector_element_basic_type(this);
 249     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 250                           Matcher::vector_length(this));
 251     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 252     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
 253                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 254                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 255   %}
 256   ins_pipe(pipe_slow);
 257 %}
 258 
 259 instruct storeV_partial(vReg src, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
 260   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
 261             n->as_StoreVector()->memory_size() < MaxVectorSize);
 262   match(Set mem (StoreVector mem src));
 263   effect(TEMP pgtmp, KILL cr);
 264   ins_cost(5 * SVE_COST);
 265   format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t"
 266             "sve_str $src, $pgtmp, $mem\t# store vector partial" %}
 267   ins_encode %{
 268     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 269     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 270                           Matcher::vector_length(this, $src));
 271     FloatRegister src_reg = as_FloatRegister($src$$reg);
 272     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
 273                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 274                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 275   %}
 276   ins_pipe(pipe_slow);
 277 %}
 278 
 279 // vector load/store - predicated
 280 
 281 instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
 282   predicate(UseSVE > 0 &&
 283             n->as_LoadVector()->memory_size() == MaxVectorSize);
 284   match(Set dst (LoadVectorMasked mem pg));
 285   ins_cost(4 * SVE_COST);
 286   format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated (sve)" %}
 287   ins_encode %{
 288     BasicType bt = Matcher::vector_element_basic_type(this);
 289     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg),
 290                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
 291                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 292   %}
 293   ins_pipe(pipe_slow);
 294 %}
 295 
 296 instruct loadV_masked_partial(vReg dst, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{
 297   predicate(UseSVE > 0 &&
 298             n->as_LoadVector()->memory_size() < MaxVectorSize);
 299   match(Set dst (LoadVectorMasked mem pg));
 300   effect(TEMP pgtmp, KILL cr);
 301   ins_cost(6 * SVE_COST);
 302   format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated partial (sve)" %}
 303   ins_encode %{
 304     BasicType bt = Matcher::vector_element_basic_type(this);
 305     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 306                           Matcher::vector_length(this));
 307     __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg),
 308                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
 309     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg),
 310                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 311                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 312   %}
 313   ins_pipe(pipe_slow);
 314 %}
 315 
 316 instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
 317   predicate(UseSVE > 0 &&
 318             n->as_StoreVector()->memory_size() == MaxVectorSize);
 319   match(Set mem (StoreVectorMasked mem (Binary src pg)));
 320   ins_cost(4 * SVE_COST);
 321   format %{ "sve_str $mem, $pg, $src\t# store vector predicated (sve)" %}
 322   ins_encode %{
 323     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 324     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg),
 325                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
 326                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 327   %}
 328   ins_pipe(pipe_slow);
 329 %}
 330 
 331 instruct storeV_masked_partial(vReg src, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{
 332   predicate(UseSVE > 0 &&
 333             n->as_StoreVector()->memory_size() < MaxVectorSize);
 334   match(Set mem (StoreVectorMasked mem (Binary src pg)));
 335   effect(TEMP pgtmp, KILL cr);
 336   ins_cost(6 * SVE_COST);
 337   format %{ "sve_str $mem, $pg, $src\t# store vector predicated partial (sve)" %}
 338   ins_encode %{
 339     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 340     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
 341                           Matcher::vector_length(this, $src));
 342     __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg),
 343                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
 344     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg),
 345                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
 346                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 347   %}
 348   ins_pipe(pipe_slow);
 349 %}
 350 
 351 dnl
 352 dnl MASKALL_IMM($1,   $2  )
 353 dnl MASKALL_IMM(type, size)
 354 define(`MASKALL_IMM', `
 355 instruct vmaskAll_imm$1(pRegGov dst, imm$1 src) %{
 356   predicate(UseSVE > 0);
 357   match(Set dst (MaskAll src));
 358   ins_cost(SVE_COST);
 359   format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) ($2)" %}
 360   ins_encode %{
 361     ifelse($1, `I', int, long) con = (ifelse($1, `I', int, long))$src$$constant;
 362     if (con == 0) {
 363       __ sve_pfalse(as_PRegister($dst$$reg));
 364     } else {
 365       assert(con == -1, "invalid constant value for mask");
 366       BasicType bt = Matcher::vector_element_basic_type(this);
 367       __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt));
 368     }
 369   %}
 370   ins_pipe(pipe_slow);
 371 %}')dnl
 372 dnl
 373 dnl MASKALL($1,   $2  )
 374 dnl MASKALL(type, size)
 375 define(`MASKALL', `
 376 instruct vmaskAll$1(pRegGov dst, ifelse($1, `I', iRegIorL2I, iRegL) src, vReg tmp, rFlagsReg cr) %{
 377   predicate(UseSVE > 0);
 378   match(Set dst (MaskAll src));
 379   effect(TEMP tmp, KILL cr);
 380   ins_cost(2 * SVE_COST);
 381   format %{ "sve_dup $tmp, $src\n\t"
 382             "sve_cmpne $dst, $tmp, 0\t# mask all (sve) ($2)" %}
 383   ins_encode %{
 384     BasicType bt = Matcher::vector_element_basic_type(this);
 385     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 386     __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg));
 387     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
 388   %}
 389   ins_pipe(pipe_slow);
 390 %}')dnl
 391 dnl
 392 // maskAll
 393 MASKALL_IMM(I, B/H/S)
 394 MASKALL(I, B/H/S)
 395 MASKALL_IMM(L, D)
 396 MASKALL(L, D)
 397 
 398 dnl
 399 dnl MASK_LOGICAL_OP($1,        $2,      $3  )
 400 dnl MASK_LOGICAL_OP(insn_name, op_name, insn)
 401 define(`MASK_LOGICAL_OP', `
 402 instruct vmask_$1(pRegGov pd, pRegGov pn, pRegGov pm) %{
 403   predicate(UseSVE > 0);
 404   match(Set pd ($2 pn pm));
 405   ins_cost(SVE_COST);
 406   format %{ "$3 $pd, $pn, $pm\t# predicate (sve)" %}
 407   ins_encode %{
 408     __ $3(as_PRegister($pd$$reg), ptrue,
 409                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
 410   %}
 411   ins_pipe(pipe_slow);
 412 %}')dnl
 413 dnl
 414 // mask logical and/or/xor
 415 MASK_LOGICAL_OP(and, AndVMask, sve_and)
 416 MASK_LOGICAL_OP(or, OrVMask, sve_orr)
 417 MASK_LOGICAL_OP(xor, XorVMask, sve_eor)
 418 
 419 dnl
 420 dnl MASK_LOGICAL_AND_NOT($1,   $2  )
 421 dnl MASK_LOGICAL_AND_NOT(type, size)
 422 define(`MASK_LOGICAL_AND_NOT', `
 423 instruct vmask_and_not$1(pRegGov pd, pRegGov pn, pRegGov pm, imm$1_M1 m1) %{
 424   predicate(UseSVE > 0);
 425   match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1))));
 426   ins_cost(SVE_COST);
 427   format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) ($2)" %}
 428   ins_encode %{
 429     __ sve_bic(as_PRegister($pd$$reg), ptrue,
 430                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
 431   %}
 432   ins_pipe(pipe_slow);
 433 %}')dnl
 434 dnl
 435 // mask logical and_not
 436 MASK_LOGICAL_AND_NOT(I, B/H/S)
 437 MASK_LOGICAL_AND_NOT(L, D)
 438 
 439 // vector reinterpret
 440 
 441 instruct reinterpret(vReg dst) %{
 442   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
 443                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst
 444   match(Set dst (VectorReinterpret dst));
 445   ins_cost(0);
 446   format %{ "# reinterpret $dst\t# do nothing" %}
 447   ins_encode %{
 448     // empty
 449   %}
 450   ins_pipe(pipe_class_empty);
 451 %}
 452 
 453 instruct reinterpretResize(vReg dst, vReg src, pRegGov pgtmp, rFlagsReg cr) %{
 454   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
 455                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
 456   match(Set dst (VectorReinterpret src));
 457   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
 458   ins_cost(3 * SVE_COST);
 459   format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
 460   ins_encode %{
 461     uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
 462     uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
 463     uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
 464                                   length_in_bytes_src : length_in_bytes_dst;
 465     assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
 466            "invalid vector length");
 467     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, length_in_bytes_resize);
 468     __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
 469     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pgtmp$$reg),
 470                as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
 471   %}
 472   ins_pipe(pipe_slow);
 473 %}
 474 
 475 // vector mask reinterpret
 476 
 477 instruct vmask_reinterpret_same_esize(pRegGov dst_src) %{
 478   predicate(UseSVE > 0 &&
 479             n->as_Vector()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
 480             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
 481   match(Set dst_src (VectorReinterpret dst_src));
 482   ins_cost(0);
 483   format %{ "# vmask_reinterpret $dst_src\t# do nothing" %}
 484   ins_encode %{
 485     // empty
 486   %}
 487   ins_pipe(pipe_class_empty);
 488 %}
 489 
 490 instruct vmask_reinterpret_diff_esize(pRegGov dst, pRegGov src, vReg tmp, rFlagsReg cr) %{
 491   predicate(UseSVE > 0 &&
 492             n->as_Vector()->length() != n->in(1)->bottom_type()->is_vect()->length() &&
 493             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
 494   match(Set dst (VectorReinterpret src));
 495   effect(TEMP tmp, KILL cr);
 496   ins_cost(2 * SVE_COST);
 497   format %{ "# vmask_reinterpret $dst, $src\t# vector (sve)" %}
 498   ins_encode %{
 499     BasicType from_bt = Matcher::vector_element_basic_type(this, $src);
 500     Assembler::SIMD_RegVariant from_size = __ elemType_to_regVariant(from_bt);
 501     BasicType to_bt = Matcher::vector_element_basic_type(this);
 502     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
 503     __ sve_cpy(as_FloatRegister($tmp$$reg), from_size, as_PRegister($src$$reg), -1, false);
 504     __ sve_cmp(Assembler::EQ, as_PRegister($dst$$reg), to_size, ptrue, as_FloatRegister($tmp$$reg), -1);
 505   %}
 506   ins_pipe(pipe_slow);
 507 %}
 508 dnl
 509 dnl UNARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4  )
 510 dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, insn)
 511 define(`UNARY_OP_TRUE_PREDICATE', `
 512 instruct $1(vReg dst, vReg src) %{
 513   predicate(UseSVE > 0 &&
 514             !n->as_Vector()->is_predicated_vector());
 515   match(Set dst ($2 src));
 516   ins_cost(SVE_COST);
 517   format %{ "$4 $dst, $src\t# vector (sve) ($3)" %}
 518   ins_encode %{
 519     __ $4(as_FloatRegister($dst$$reg), __ $3,
 520          ptrue, as_FloatRegister($src$$reg));
 521   %}
 522   ins_pipe(pipe_slow);
 523 %}')dnl
 524 dnl
 525 
 526 // vector abs
 527 UNARY_OP_TRUE_PREDICATE(vabsB, AbsVB, B, sve_abs)
 528 UNARY_OP_TRUE_PREDICATE(vabsS, AbsVS, H, sve_abs)
 529 UNARY_OP_TRUE_PREDICATE(vabsI, AbsVI, S, sve_abs)
 530 UNARY_OP_TRUE_PREDICATE(vabsL, AbsVL, D, sve_abs)
 531 UNARY_OP_TRUE_PREDICATE(vabsF, AbsVF, S, sve_fabs)
 532 UNARY_OP_TRUE_PREDICATE(vabsD, AbsVD, D, sve_fabs)
 533 
 534 dnl UNARY_OP_PREDICATE($1,        $2,      $3,   $4  )
 535 dnl UNARY_OP_PREDICATE(insn_name, op_name, size, insn)
 536 define(`UNARY_OP_PREDICATE', `
 537 instruct $1_masked(vReg dst_src, pRegGov pg) %{
 538   predicate(UseSVE > 0);
 539   match(Set dst_src ($2 dst_src pg));
 540   ins_cost(SVE_COST);
 541   format %{ "$4 $dst_src, $pg, $dst_src\t# vector (sve) ($3)" %}
 542   ins_encode %{
 543     __ $4(as_FloatRegister($dst_src$$reg), __ $3,
 544             as_PRegister($pg$$reg),
 545             as_FloatRegister($dst_src$$reg));
 546   %}
 547   ins_pipe(pipe_slow);
 548 %}')dnl
 549 // vector abs - predicated
 550 UNARY_OP_PREDICATE(vabsB, AbsVB, B, sve_abs)
 551 UNARY_OP_PREDICATE(vabsS, AbsVS, H, sve_abs)
 552 UNARY_OP_PREDICATE(vabsI, AbsVI, S, sve_abs)
 553 UNARY_OP_PREDICATE(vabsL, AbsVL, D, sve_abs)
 554 UNARY_OP_PREDICATE(vabsF, AbsVF, S, sve_fabs)
 555 UNARY_OP_PREDICATE(vabsD, AbsVD, D, sve_fabs)
 556 
 557 dnl
 558 dnl BINARY_OP_UNPREDICATE($1,        $2       $3,   $4           $5  )
 559 dnl BINARY_OP_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
 560 define(`BINARY_OP_UNPREDICATE', `
 561 instruct $1(vReg dst, vReg src1, vReg src2) %{
 562   predicate(UseSVE > 0);
 563   match(Set dst ($2 src1 src2));
 564   ins_cost(SVE_COST);
 565   format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
 566   ins_encode %{
 567     __ $5(as_FloatRegister($dst$$reg), __ $3,
 568          as_FloatRegister($src1$$reg),
 569          as_FloatRegister($src2$$reg));
 570   %}
 571   ins_pipe(pipe_slow);
 572 %}')dnl
 573 dnl
 574 dnl
 575 dnl BINARY_OP_PREDICATE($1,        $2,      $3,   $4  )
 576 dnl BINARY_OP_PREDICATE(insn_name, op_name, size, insn)
 577 define(`BINARY_OP_PREDICATE', `
 578 instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 579   predicate(UseSVE > 0);
 580   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 581   ins_cost(SVE_COST);
 582   format %{ "$4 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) ($3)" %}
 583   ins_encode %{
 584     __ $4(as_FloatRegister($dst_src1$$reg), __ $3,
 585             as_PRegister($pg$$reg),
 586             as_FloatRegister($src2$$reg));
 587   %}
 588   ins_pipe(pipe_slow);
 589 %}')dnl
 590 dnl
 591 // vector add
 592 BINARY_OP_UNPREDICATE(vaddB, AddVB, B, 16, sve_add)
 593 BINARY_OP_UNPREDICATE(vaddS, AddVS, H, 8,  sve_add)
 594 BINARY_OP_UNPREDICATE(vaddI, AddVI, S, 4,  sve_add)
 595 BINARY_OP_UNPREDICATE(vaddL, AddVL, D, 2,  sve_add)
 596 BINARY_OP_UNPREDICATE(vaddF, AddVF, S, 4,  sve_fadd)
 597 BINARY_OP_UNPREDICATE(vaddD, AddVD, D, 2,  sve_fadd)
 598 
 599 // vector add - predicated
 600 BINARY_OP_PREDICATE(vaddB, AddVB, B, sve_add)
 601 BINARY_OP_PREDICATE(vaddS, AddVS, H, sve_add)
 602 BINARY_OP_PREDICATE(vaddI, AddVI, S, sve_add)
 603 BINARY_OP_PREDICATE(vaddL, AddVL, D, sve_add)
 604 BINARY_OP_PREDICATE(vaddF, AddVF, S, sve_fadd)
 605 BINARY_OP_PREDICATE(vaddD, AddVD, D, sve_fadd)
 606 
 607 dnl
 608 dnl BINARY_OP_UNSIZED($1,        $2,      $3  )
 609 dnl BINARY_OP_UNSIZED(insn_name, op_name, insn)
 610 define(`BINARY_OP_UNSIZED', `
 611 instruct $1(vReg dst, vReg src1, vReg src2) %{
 612   predicate(UseSVE > 0);
 613   match(Set dst ($2 src1 src2));
 614   ins_cost(SVE_COST);
 615   format %{ "$3  $dst, $src1, $src2\t# vector (sve)" %}
 616   ins_encode %{
 617     __ $3(as_FloatRegister($dst$$reg),
 618          as_FloatRegister($src1$$reg),
 619          as_FloatRegister($src2$$reg));
 620   %}
 621   ins_pipe(pipe_slow);
 622 %}')dnl
 623 dnl
 624 // vector and
 625 BINARY_OP_UNSIZED(vand, AndV, sve_and)
 626 
 627 // vector or
 628 BINARY_OP_UNSIZED(vor, OrV, sve_orr)
 629 
 630 // vector xor
 631 BINARY_OP_UNSIZED(vxor, XorV, sve_eor)
 632 
 633 dnl BINARY_LOGIC_OP_PREDICATE($1,        $2,      $3  )
 634 dnl BINARY_LOGIC_OP_PREDICATE(insn_name, op_name, insn)
 635 define(`BINARY_LOGIC_OP_PREDICATE', `
 636 instruct $1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 637   predicate(UseSVE > 0);
 638   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 639   ins_cost(SVE_COST);
 640   format %{ "$3 $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %}
 641   ins_encode %{
 642     BasicType bt = Matcher::vector_element_basic_type(this);
 643     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 644     __ $3(as_FloatRegister($dst_src1$$reg), size,
 645           as_PRegister($pg$$reg),
 646           as_FloatRegister($src2$$reg));
 647   %}
 648   ins_pipe(pipe_slow);
 649 %}')dnl
 650 dnl
 651 // vector and - predicated
 652 BINARY_LOGIC_OP_PREDICATE(vand, AndV, sve_and)
 653 
 654 // vector or - predicated
 655 BINARY_LOGIC_OP_PREDICATE(vor, OrV, sve_orr)
 656 
 657 // vector xor - predicated
 658 BINARY_LOGIC_OP_PREDICATE(vxor, XorV, sve_eor)
 659 
 660 // vector not
 661 dnl
 662 define(`MATCH_RULE', `ifelse($1, I,
 663 `match(Set dst (XorV src (ReplicateB m1)));
 664   match(Set dst (XorV src (ReplicateS m1)));
 665   match(Set dst (XorV src (ReplicateI m1)));',
 666 `match(Set dst (XorV src (ReplicateL m1)));')')dnl
 667 dnl
 668 define(`VECTOR_NOT', `
 669 instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{
 670   predicate(UseSVE > 0);
 671   MATCH_RULE($1)
 672   ins_cost(SVE_COST);
 673   format %{ "sve_not $dst, $src\t# vector (sve) $2" %}
 674   ins_encode %{
 675     __ sve_not(as_FloatRegister($dst$$reg), __ D,
 676                ptrue, as_FloatRegister($src$$reg));
 677   %}
 678   ins_pipe(pipe_slow);
 679 %}')dnl
 680 dnl        $1,$2
 681 VECTOR_NOT(I, B/H/S)
 682 VECTOR_NOT(L, D)
 683 undefine(MATCH_RULE)
 684 dnl
 685 // vector and_not
 686 dnl
 687 define(`MATCH_RULE', `ifelse($1, I,
 688 `match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
 689   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
 690   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
 691 `match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
 692 dnl
 693 define(`VECTOR_AND_NOT', `
 694 instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{
 695   predicate(UseSVE > 0);
 696   MATCH_RULE($1)
 697   ins_cost(SVE_COST);
 698   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %}
 699   ins_encode %{
 700     __ sve_bic(as_FloatRegister($dst$$reg),
 701                as_FloatRegister($src1$$reg),
 702                as_FloatRegister($src2$$reg));
 703   %}
 704   ins_pipe(pipe_slow);
 705 %}')dnl
 706 dnl            $1,$2
 707 VECTOR_AND_NOT(I, B/H/S)
 708 VECTOR_AND_NOT(L, D)
 709 undefine(MATCH_RULE)
 710 dnl
 711 dnl VDIVF($1,          $2  , $3         )
 712 dnl VDIVF(name_suffix, size, min_vec_len)
 713 define(`VDIVF', `
 714 instruct vdiv$1(vReg dst_src1, vReg src2) %{
 715   predicate(UseSVE > 0);
 716   match(Set dst_src1 (DivV$1 dst_src1 src2));
 717   ins_cost(SVE_COST);
 718   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
 719   ins_encode %{
 720     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
 721          ptrue, as_FloatRegister($src2$$reg));
 722   %}
 723   ins_pipe(pipe_slow);
 724 %}')dnl
 725 dnl
 726 // vector float div
 727 VDIVF(F, S, 4)
 728 VDIVF(D, D, 2)
 729 
 730 // vector float div - predicated
 731 BINARY_OP_PREDICATE(vfdivF, DivVF, S, sve_fdiv)
 732 BINARY_OP_PREDICATE(vfdivD, DivVD, D, sve_fdiv)
 733 
 734 dnl
 735 dnl VMINMAX($1     , $2, $3   , $4  )
 736 dnl VMINMAX(op_name, op, finsn, insn)
 737 define(`VMINMAX', `
 738 instruct v$1(vReg dst_src1, vReg src2) %{
 739   predicate(UseSVE > 0);
 740   match(Set dst_src1 ($2 dst_src1 src2));
 741   ins_cost(SVE_COST);
 742   format %{ "sve_$1 $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 743   ins_encode %{
 744     BasicType bt = Matcher::vector_element_basic_type(this);
 745     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 746     if (is_floating_point_type(bt)) {
 747       __ $3(as_FloatRegister($dst_src1$$reg), size,
 748                   ptrue, as_FloatRegister($src2$$reg));
 749     } else {
 750       assert(is_integral_type(bt), "unsupported type");
 751       __ $4(as_FloatRegister($dst_src1$$reg), size,
 752                   ptrue, as_FloatRegister($src2$$reg));
 753     }
 754   %}
 755   ins_pipe(pipe_slow);
 756 %}')dnl
 757 dnl
 758 // vector min/max
 759 VMINMAX(min, MinV, sve_fmin, sve_smin)
 760 VMINMAX(max, MaxV, sve_fmax, sve_smax)
 761 
 762 dnl
 763 dnl VMINMAX_PREDICATE($1     , $2, $3   , $4  )
 764 dnl VMINMAX_PREDICATE(op_name, op, finsn, insn)
 765 define(`VMINMAX_PREDICATE', `
 766 instruct v$1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
 767   predicate(UseSVE > 0);
 768   match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
 769   ins_cost(SVE_COST);
 770   format %{ "sve_$1 $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %}
 771   ins_encode %{
 772     BasicType bt = Matcher::vector_element_basic_type(this);
 773     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 774     if (is_floating_point_type(bt)) {
 775       __ $3(as_FloatRegister($dst_src1$$reg), size,
 776                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
 777     } else {
 778       assert(is_integral_type(bt), "unsupported type");
 779       __ $4(as_FloatRegister($dst_src1$$reg), size,
 780                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
 781     }
 782   %}
 783   ins_pipe(pipe_slow);
 784 %}')dnl
 785 dnl
 786 // vector min/max - predicated
 787 VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin)
 788 VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax)
 789 
 790 dnl
 791 dnl VFMLA($1           $2    $3         )
 792 dnl VFMLA(name_suffix, size, min_vec_len)
 793 define(`VFMLA', `
 794 // dst_src1 = dst_src1 + src2 * src3
 795 instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
 796   predicate(UseFMA && UseSVE > 0);
 797   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
 798   ins_cost(SVE_COST);
 799   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 800   ins_encode %{
 801     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
 802          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 803   %}
 804   ins_pipe(pipe_slow);
 805 %}')dnl
 806 dnl
 807 // vector fmla
 808 VFMLA(F, S, 4)
 809 VFMLA(D, D, 2)
 810 
 811 dnl
 812 dnl VFMLA_PREDICATE($1,   $2  )
 813 dnl VFMLA_PREDICATE(type, size)
 814 define(`VFMLA_PREDICATE', `
 815 // dst_src1 = dst_src1 * src2 + src3
 816 instruct vfmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
 817   predicate(UseFMA && UseSVE > 0);
 818   match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary src3 pg)));
 819   ins_cost(SVE_COST);
 820   format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) ($2)" %}
 821   ins_encode %{
 822     __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
 823          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 824   %}
 825   ins_pipe(pipe_slow);
 826 %}')dnl
 827 dnl
 828 // vector fmla - predicated
 829 VFMLA_PREDICATE(F, S)
 830 VFMLA_PREDICATE(D, D)
 831 
 832 dnl
 833 dnl VFMLS($1           $2    $3         )
 834 dnl VFMLS(name_suffix, size, min_vec_len)
 835 define(`VFMLS', `
 836 // dst_src1 = dst_src1 + -src2 * src3
 837 // dst_src1 = dst_src1 + src2 * -src3
 838 instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
 839   predicate(UseFMA && UseSVE > 0);
 840   match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
 841   match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
 842   ins_cost(SVE_COST);
 843   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 844   ins_encode %{
 845     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
 846          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 847   %}
 848   ins_pipe(pipe_slow);
 849 %}')dnl
 850 dnl
 851 // vector fmls
 852 VFMLS(F, S, 4)
 853 VFMLS(D, D, 2)
 854 
 855 dnl
 856 dnl VFNMLA($1           $2    $3         )
 857 dnl VFNMLA(name_suffix, size, min_vec_len)
 858 define(`VFNMLA', `
 859 // dst_src1 = -dst_src1 + -src2 * src3
 860 // dst_src1 = -dst_src1 + src2 * -src3
 861 instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
 862   predicate(UseFMA && UseSVE > 0);
 863   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
 864   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
 865   ins_cost(SVE_COST);
 866   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 867   ins_encode %{
 868     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
 869          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 870   %}
 871   ins_pipe(pipe_slow);
 872 %}')dnl
 873 dnl
 874 // vector fnmla
 875 VFNMLA(F, S, 4)
 876 VFNMLA(D, D, 2)
 877 
 878 dnl
 879 dnl VFNMLS($1           $2    $3         )
 880 dnl VFNMLS(name_suffix, size, min_vec_len)
 881 define(`VFNMLS', `
 882 // dst_src1 = -dst_src1 + src2 * src3
 883 instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
 884   predicate(UseFMA && UseSVE > 0);
 885   match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
 886   ins_cost(SVE_COST);
 887   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
 888   ins_encode %{
 889     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
 890          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 891   %}
 892   ins_pipe(pipe_slow);
 893 %}')dnl
 894 dnl
 895 // vector fnmls
 896 VFNMLS(F, S, 4)
 897 VFNMLS(D, D, 2)
 898 
 899 dnl
 900 dnl VMLA($1           $2    $3         )
 901 dnl VMLA(name_suffix, size, min_vec_len)
 902 define(`VMLA', `
 903 // dst_src1 = dst_src1 + src2 * src3
 904 instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
 905 %{
 906   predicate(UseSVE > 0);
 907   match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
 908   ins_cost(SVE_COST);
 909   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
 910   ins_encode %{
 911     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
 912       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 913   %}
 914   ins_pipe(pipe_slow);
 915 %}')dnl
 916 dnl
 917 // vector mla
 918 VMLA(B, B, 16)
 919 VMLA(S, H, 8)
 920 VMLA(I, S, 4)
 921 VMLA(L, D, 2)
 922 
 923 dnl
 924 dnl VMLS($1           $2    $3         )
 925 dnl VMLS(name_suffix, size, min_vec_len)
 926 define(`VMLS', `
 927 // dst_src1 = dst_src1 - src2 * src3
 928 instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
 929 %{
 930   predicate(UseSVE > 0);
 931   match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
 932   ins_cost(SVE_COST);
 933   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
 934   ins_encode %{
 935     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
 936       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 937   %}
 938   ins_pipe(pipe_slow);
 939 %}')dnl
 940 dnl
 941 // vector mls
 942 VMLS(B, B, 16)
 943 VMLS(S, H, 8)
 944 VMLS(I, S, 4)
 945 VMLS(L, D, 2)
 946 
 947 dnl
 948 dnl BINARY_OP_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
 949 dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
 950 define(`BINARY_OP_TRUE_PREDICATE', `
 951 instruct $1(vReg dst_src1, vReg src2) %{
 952   predicate(UseSVE > 0);
 953   match(Set dst_src1 ($2 dst_src1 src2));
 954   ins_cost(SVE_COST);
 955   format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
 956   ins_encode %{
 957     __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
 958          ptrue, as_FloatRegister($src2$$reg));
 959   %}
 960   ins_pipe(pipe_slow);
 961 %}')dnl
 962 dnl
 963 // vector mul
 964 BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
 965 BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8,  sve_mul)
 966 BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4,  sve_mul)
 967 BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2,  sve_mul)
 968 BINARY_OP_UNPREDICATE(vmulF, MulVF, S, 4, sve_fmul)
 969 BINARY_OP_UNPREDICATE(vmulD, MulVD, D, 2, sve_fmul)
 970 
 971 // vector mul - predicated
 972 BINARY_OP_PREDICATE(vmulB, MulVB, B, sve_mul)
 973 BINARY_OP_PREDICATE(vmulS, MulVS, H, sve_mul)
 974 BINARY_OP_PREDICATE(vmulI, MulVI, S, sve_mul)
 975 BINARY_OP_PREDICATE(vmulL, MulVL, D, sve_mul)
 976 BINARY_OP_PREDICATE(vmulF, MulVF, S, sve_fmul)
 977 BINARY_OP_PREDICATE(vmulD, MulVD, D, sve_fmul)
 978 
 979 // vector fneg
 980 UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, sve_fneg)
 981 UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, sve_fneg)
 982 
 983 // vector fneg - predicated
 984 UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg)
 985 UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg)
 986 
 987 // popcount vector
 988 
 989 instruct vpopcountI(vReg dst, vReg src) %{
 990   predicate(UseSVE > 0);
 991   match(Set dst (PopCountVI src));
 992   format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
 993   ins_encode %{
 994      __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
 995   %}
 996   ins_pipe(pipe_slow);
 997 %}
 998 
 999 // vector mask compare
1000 
1001 instruct vmaskcmp(pRegGov dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
1002   predicate(UseSVE > 0);
1003   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
1004   effect(KILL cr);
1005   ins_cost(SVE_COST);
1006   format %{ "sve_cmp $dst, $src1, $src2\t# vector mask cmp (sve)" %}
1007   ins_encode %{
1008     BasicType bt = Matcher::vector_element_basic_type(this);
1009     __ sve_compare(as_PRegister($dst$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
1010                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
1011   %}
1012   ins_pipe(pipe_slow);
1013 %}
1014 
1015 instruct vmaskcmp_masked(pRegGov dst, vReg src1, vReg src2, immI cond, pRegGov pg, rFlagsReg cr) %{
1016   predicate(UseSVE > 0);
1017   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond pg)));
1018   effect(KILL cr);
1019   ins_cost(SVE_COST);
1020   format %{ "sve_cmp $dst, $pg, $src1, $src2\t# vector mask cmp (sve)" %}
1021   ins_encode %{
1022     BasicType bt = Matcher::vector_element_basic_type(this);
1023     __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($pg$$reg), as_FloatRegister($src1$$reg),
1024                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
1025   %}
1026   ins_pipe(pipe_slow);
1027 %}
1028 
1029 // vector blend
1030 
1031 instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
1032   predicate(UseSVE > 0);
1033   match(Set dst (VectorBlend (Binary src1 src2) pg));
1034   ins_cost(SVE_COST);
1035   format %{ "sve_sel $dst, $pg, $src2, $src1\t# vector blend (sve)" %}
1036   ins_encode %{
1037     Assembler::SIMD_RegVariant size =
1038                __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
1039     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
1040                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
1041   %}
1042   ins_pipe(pipe_slow);
1043 %}
1044 
1045 // vector load mask
1046 
1047 instruct vloadmaskB(pRegGov dst, vReg src, rFlagsReg cr) %{
1048   predicate(UseSVE > 0 &&
1049             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1050   match(Set dst (VectorLoadMask src));
1051   effect(KILL cr);
1052   ins_cost(SVE_COST);
1053   format %{ "vloadmaskB $dst, $src\t# vector load mask (sve) (B)" %}
1054   ins_encode %{
1055     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ B,
1056                ptrue, as_FloatRegister($src$$reg), 0);
1057   %}
1058   ins_pipe(pipe_slow);
1059 %}
1060 
1061 instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
1062   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() != T_BYTE);
1063   match(Set dst (VectorLoadMask src));
1064   effect(TEMP tmp, KILL cr);
1065   ins_cost(3 * SVE_COST);
1066   format %{ "vloadmask $dst, $src\t# vector load mask (sve) (H/S/D)" %}
1067   ins_encode %{
1068     BasicType bt = Matcher::vector_element_basic_type(this);
1069     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
1070     __ sve_vector_extend(as_FloatRegister($tmp$$reg), size, as_FloatRegister($src$$reg), __ B);
1071     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
1072   %}
1073   ins_pipe(pipe_slow);
1074 %}
1075 
1076 // vector store mask
1077 
1078 instruct vstoremaskB(vReg dst, pRegGov src, immI_1 size) %{
1079   predicate(UseSVE > 0);
1080   match(Set dst (VectorStoreMask src size));
1081   ins_cost(SVE_COST);
1082   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (B)" %}
1083   ins_encode %{
1084     __ sve_cpy(as_FloatRegister($dst$$reg), __ B, as_PRegister($src$$reg), 1, false);
1085   %}
1086   ins_pipe(pipe_slow);
1087 %}
1088 
1089 instruct vstoremask_narrow(vReg dst, pRegGov src, vReg tmp, immI_gt_1 size) %{
1090   predicate(UseSVE > 0);
1091   match(Set dst (VectorStoreMask src size));
1092   effect(TEMP_DEF dst, TEMP tmp);
1093   ins_cost(3 * SVE_COST);
1094   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (H/S/D)" %}
1095   ins_encode %{
1096     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant((int)$size$$constant);
1097     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($src$$reg), 1, false);
1098     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
1099                          as_FloatRegister($dst$$reg), size, as_FloatRegister($tmp$$reg));
1100   %}
1101   ins_pipe(pipe_slow);
1102 %}
1103 
1104 // Combine LoadVector+VectorLoadMask when the vector element type is not T_BYTE
1105 
1106 instruct vloadmask_loadV(pRegGov dst, indirect mem, vReg tmp, rFlagsReg cr) %{
1107   predicate(UseSVE > 0 &&
1108             n->as_Vector()->length_in_bytes() == MaxVectorSize &&
1109             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
1110   match(Set dst (VectorLoadMask (LoadVector mem)));
1111   effect(TEMP tmp, KILL cr);
1112   ins_cost(3 * SVE_COST);
1113   format %{ "sve_ld1b $tmp, $mem\n\t"
1114             "sve_cmpne $dst, $tmp, 0\t# load vector mask (sve) (H/S/D)" %}
1115   ins_encode %{
1116     // Load mask values which are boolean type, and extend them to the
1117     // expected vector element type. Convert the vector to predicate.
1118     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1119     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($tmp$$reg),
1120                           ptrue, T_BOOLEAN, to_vect_bt, $mem->opcode(),
1121                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1122     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ elemType_to_regVariant(to_vect_bt),
1123                ptrue, as_FloatRegister($tmp$$reg), 0);
1124   %}
1125   ins_pipe(pipe_slow);
1126 %}
1127 
1128 instruct vloadmask_loadV_partial(pRegGov dst, indirect mem, vReg vtmp, pRegGov ptmp, rFlagsReg cr) %{
1129   predicate(UseSVE > 0 &&
1130             n->as_Vector()->length_in_bytes() > 16 &&
1131             n->as_Vector()->length_in_bytes() < MaxVectorSize &&
1132             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
1133   match(Set dst (VectorLoadMask (LoadVector mem)));
1134   effect(TEMP vtmp, TEMP ptmp, KILL cr);
1135   ins_cost(6 * SVE_COST);
1136   format %{ "vloadmask_loadV $dst, $mem\t# load vector mask partial (sve) (H/S/D)" %}
1137   ins_encode %{
1138     // Load valid mask values which are boolean type, and extend them to the
1139     // expected vector element type. Convert the vector to predicate.
1140     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1141     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(to_vect_bt);
1142     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this));
1143     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($vtmp$$reg),
1144                           as_PRegister($ptmp$$reg), T_BOOLEAN, to_vect_bt, $mem->opcode(),
1145                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1146     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($vtmp$$reg), 0);
1147   %}
1148   ins_pipe(pipe_slow);
1149 %}
1150 
1151 // Combine VectorStoreMask+StoreVector when the vector element type is not T_BYTE
1152 
1153 instruct storeV_vstoremask(indirect mem, pRegGov src, vReg tmp, immI_gt_1 esize) %{
1154   predicate(UseSVE > 0 &&
1155             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) == MaxVectorSize);
1156   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1157   effect(TEMP tmp);
1158   ins_cost(3 * SVE_COST);
1159   format %{ "sve_cpy $tmp, $src, 1\n\t"
1160             "sve_st1b $tmp, $mem\t# store vector mask (sve) (H/S/D)" %}
1161   ins_encode %{
1162     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1163     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
1164     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
1165     __ sve_cpy(as_FloatRegister($tmp$$reg), size, as_PRegister($src$$reg), 1, false);
1166     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
1167                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
1168                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1169   %}
1170   ins_pipe(pipe_slow);
1171 %}
1172 
1173 instruct storeV_vstoremask_partial(indirect mem, pRegGov src, vReg vtmp,
1174                                    immI_gt_1 esize, pRegGov ptmp, rFlagsReg cr) %{
1175   predicate(UseSVE > 0 &&
1176             n->as_StoreVector()->memory_size() > 16 &&
1177             type2aelembytes(n->as_StoreVector()->vect_type()->element_basic_type()) > 1 &&
1178             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) < MaxVectorSize);
1179   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1180   effect(TEMP vtmp, TEMP ptmp, KILL cr);
1181   format %{ "storeV_vstoremask $src, $mem\t# store vector mask partial (sve) (H/S/D)" %}
1182   ins_cost(6 * SVE_COST);
1183   ins_encode %{
1184     // Convert the valid src predicate to vector, and store the vector
1185     // elements as boolean values.
1186     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1187     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(from_vect_bt);
1188     __ sve_cpy(as_FloatRegister($vtmp$$reg), size, as_PRegister($src$$reg), 1, false);
1189     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
1190     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($vtmp$$reg),
1191                           as_PRegister($ptmp$$reg), T_BOOLEAN, from_vect_bt, $mem->opcode(),
1192                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1193   %}
1194   ins_pipe(pipe_slow);
1195 %}
1196 dnl
1197 dnl REDUCE_I($1,        $2     )
1198 dnl REDUCE_I(insn_name, op_name)
1199 define(`REDUCE_I', `
1200 instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
1201   ifelse($2, AddReductionVI,
1202        `predicate(UseSVE > 0 &&
1203             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1204        `predicate(UseSVE > 0 &&
1205             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1206             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1207   match(Set dst ($2 src1 src2));
1208   effect(TEMP_DEF dst, TEMP tmp);
1209   ins_cost(SVE_COST);
1210   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve) (may extend)" %}
1211   ins_encode %{
1212     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1213     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1214                            $src1$$Register, as_FloatRegister($src2$$reg),
1215                            ptrue, as_FloatRegister($tmp$$reg));
1216   %}
1217   ins_pipe(pipe_slow);
1218 %}')dnl
1219 dnl
1220 dnl
1221 dnl REDUCE_L($1,        $2    )
1222 dnl REDUCE_L(insn_name, op_name)
1223 define(`REDUCE_L', `
1224 instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
1225   ifelse($2, AddReductionVL,
1226        `predicate(UseSVE > 0 &&
1227             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1228        `predicate(UseSVE > 0 &&
1229             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1230             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1231   match(Set dst ($2 src1 src2));
1232   effect(TEMP_DEF dst, TEMP tmp);
1233   ins_cost(SVE_COST);
1234   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %}
1235   ins_encode %{
1236     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1237                            $src1$$Register, as_FloatRegister($src2$$reg),
1238                            ptrue, as_FloatRegister($tmp$$reg));
1239   %}
1240   ins_pipe(pipe_slow);
1241 %}')dnl
1242 dnl
1243 dnl REDUCE_I_PARTIAL($1,        $2     )
1244 dnl REDUCE_I_PARTIAL(insn_name, op_name)
1245 define(`REDUCE_I_PARTIAL', `
1246 instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1247                              pRegGov ptmp, rFlagsReg cr) %{
1248   ifelse($2, AddReductionVI,
1249        `predicate(UseSVE > 0 &&
1250             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1251        `predicate(UseSVE > 0 &&
1252             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1253             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1254   match(Set dst ($2 src1 src2));
1255   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1256   ins_cost(2 * SVE_COST);
1257   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve) (may extend)" %}
1258   ins_encode %{
1259     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1260     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1261     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1262                           Matcher::vector_length(this, $src2));
1263     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1264                            $src1$$Register, as_FloatRegister($src2$$reg),
1265                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1266   %}
1267   ins_pipe(pipe_slow);
1268 %}')dnl
1269 dnl
1270 dnl REDUCE_L_PARTIAL($1,        $2    )
1271 dnl REDUCE_L_PARTIAL(insn_name, op_name)
1272 define(`REDUCE_L_PARTIAL', `
1273 instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1274                              pRegGov ptmp, rFlagsReg cr) %{
1275   ifelse($2, AddReductionVL,
1276        `predicate(UseSVE > 0 &&
1277             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1278        `predicate(UseSVE > 0 &&
1279             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1280             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1281   match(Set dst ($2 src1 src2));
1282   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1283   ins_cost(2 * SVE_COST);
1284   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction partial (sve)" %}
1285   ins_encode %{
1286     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1287                           Matcher::vector_length(this, $src2));
1288     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1289                            $src1$$Register, as_FloatRegister($src2$$reg),
1290                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1291   %}
1292   ins_pipe(pipe_slow);
1293 %}')dnl
1294 dnl
1295 dnl REDUCE_ADDF($1,        $2,      $3,      $4  )
1296 dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
1297 define(`REDUCE_ADDF', `
1298 instruct reduce_$1($3 src1_dst, vReg src2) %{
1299   predicate(UseSVE > 0 &&
1300             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1301   match(Set src1_dst ($2 src1_dst src2));
1302   ins_cost(SVE_COST);
1303   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
1304   ins_encode %{
1305     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1306          ptrue, as_FloatRegister($src2$$reg));
1307   %}
1308   ins_pipe(pipe_slow);
1309 %}')dnl
1310 dnl
1311 dnl
1312 dnl REDUCE_ADDF_PARTIAL($1,        $2,     $3,      $4  )
1313 dnl REDUCE_ADDF_PARTIAL(insn_name, suffix, reg_dst, size)
1314 define(`REDUCE_ADDF_PARTIAL', `
1315 instruct reduce_$1_partial($3 src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
1316   predicate(UseSVE > 0 &&
1317             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1318   match(Set src1_dst ($2 src1_dst src2));
1319   ins_cost(SVE_COST);
1320   effect(TEMP ptmp, KILL cr);
1321   format %{ "sve_reduce_$1 $src1_dst, $src1_dst, $src2\t# $1 reduction partial (sve) ($4)" %}
1322   ins_encode %{
1323     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1324                           Matcher::vector_length(this, $src2));
1325     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1326                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1327   %}
1328   ins_pipe(pipe_slow);
1329 %}')dnl
1330 dnl
1331 dnl
1332 dnl REDUCE_I_PREDICATE($1,        $2     )
1333 dnl REDUCE_I_PREDICATE(insn_name, op_name)
1334 define(`REDUCE_I_PREDICATE', `
1335 instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
1336   ifelse($2, AddReductionVI,
1337        `predicate(UseSVE > 0 &&
1338             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1339        `predicate(UseSVE > 0 &&
1340             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1341             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1342   match(Set dst ($2 (Binary src1 src2) pg));
1343   effect(TEMP_DEF dst, TEMP tmp);
1344   ins_cost(SVE_COST);
1345   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve) (may extend)" %}
1346   ins_encode %{
1347     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1348     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1349                            $src1$$Register, as_FloatRegister($src2$$reg),
1350                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1351   %}
1352   ins_pipe(pipe_slow);
1353 %}')dnl
1354 dnl
1355 dnl REDUCE_L_PREDICATE($1,        $2    )
1356 dnl REDUCE_L_PREDICATE(insn_name, op_name)
1357 define(`REDUCE_L_PREDICATE', `
1358 instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
1359   ifelse($2, AddReductionVL,
1360        `predicate(UseSVE > 0 &&
1361             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);',
1362        `predicate(UseSVE > 0 &&
1363             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1364             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);')
1365   match(Set dst ($2 (Binary src1 src2) pg));
1366   effect(TEMP_DEF dst, TEMP tmp);
1367   ins_cost(SVE_COST);
1368   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %}
1369   ins_encode %{
1370     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1371                            $src1$$Register, as_FloatRegister($src2$$reg),
1372                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1373   %}
1374   ins_pipe(pipe_slow);
1375 %}')dnl
1376 dnl
1377 dnl REDUCE_I_PREDICATE_PARTIAL($1,        $2     )
1378 dnl REDUCE_I_PREDICATE_PARTIAL(insn_name, op_name)
1379 define(`REDUCE_I_PREDICATE_PARTIAL', `
1380 instruct reduce_$1I_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1381                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
1382   ifelse($2, AddReductionVI,
1383        `predicate(UseSVE > 0 &&
1384             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1385        `predicate(UseSVE > 0 &&
1386             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1387             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1388   match(Set dst ($2 (Binary src1 src2) pg));
1389   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1390   ins_cost(3 * SVE_COST);
1391   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated partial (sve) (may extend)" %}
1392   ins_encode %{
1393     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1394     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1395     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1396                           Matcher::vector_length(this, $src2));
1397     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1398                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1399     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1400                            $src1$$Register, as_FloatRegister($src2$$reg),
1401                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1402   %}
1403   ins_pipe(pipe_slow);
1404 %}')dnl
1405 dnl
1406 dnl REDUCE_L_PREDICATE_PARTIAL($1,        $2    )
1407 dnl REDUCE_L_PREDICATE_PARTIAL(insn_name, op_name)
1408 define(`REDUCE_L_PREDICATE_PARTIAL', `
1409 instruct reduce_$1L_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1410                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
1411   ifelse($2, AddReductionVL,
1412        `predicate(UseSVE > 0 &&
1413             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);',
1414        `predicate(UseSVE > 0 &&
1415             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1416             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);')
1417   match(Set dst ($2 (Binary src1 src2) pg));
1418   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1419   ins_cost(3 * SVE_COST);
1420   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated partial (sve)" %}
1421   ins_encode %{
1422     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1423                           Matcher::vector_length(this, $src2));
1424     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1425                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1426     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1427                            $src1$$Register, as_FloatRegister($src2$$reg),
1428                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1429   %}
1430   ins_pipe(pipe_slow);
1431 %}')dnl
1432 dnl
1433 dnl REDUCE_ADDF_PREDICATE($1,        $2,      $3,      $4  )
1434 dnl REDUCE_ADDF_PREDICATE(insn_name, op_name, reg_dst, size)
1435 define(`REDUCE_ADDF_PREDICATE', `
1436 instruct reduce_$1_masked($3 src1_dst, vReg src2, pRegGov pg) %{
1437   predicate(UseSVE > 0 &&
1438             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1439   match(Set src1_dst ($2 (Binary src1_dst src2) pg));
1440   ins_cost(SVE_COST);
1441   format %{ "sve_reduce_$1 $src1_dst, $pg, $src2\t# $1 reduction predicated (sve)" %}
1442   ins_encode %{
1443     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1444                  as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
1445   %}
1446   ins_pipe(pipe_slow);
1447 %}')dnl
1448 dnl
1449 dnl REDUCE_ADDF_PREDICATE_PARTIAL($1,        $2,      $3,      $4  )
1450 dnl REDUCE_ADDF_PREDICATE_PARTIAL(insn_name, op_name, reg_dst, size)
1451 define(`REDUCE_ADDF_PREDICATE_PARTIAL', `
1452 instruct reduce_$1_masked_partial($3 src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
1453   predicate(UseSVE > 0 &&
1454             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1455   match(Set src1_dst ($2 (Binary src1_dst src2) pg));
1456   effect(TEMP ptmp, KILL cr);
1457   ins_cost(SVE_COST);
1458   format %{ "sve_reduce_$1 $src1_dst, $pg, $src2\t# $1 reduction predicated partial (sve)" %}
1459   ins_encode %{
1460     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1461                           Matcher::vector_length(this, $src2));
1462     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1463                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1464     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
1465                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1466   %}
1467   ins_pipe(pipe_slow);
1468 %}')dnl
1469 dnl
1470 
1471 // vector add reduction
1472 REDUCE_I(add, AddReductionVI)
1473 REDUCE_L(add, AddReductionVL)
1474 REDUCE_ADDF(addF, AddReductionVF, vRegF, S)
1475 REDUCE_ADDF(addD, AddReductionVD, vRegD, D)
1476 REDUCE_I_PARTIAL(add, AddReductionVI)
1477 REDUCE_L_PARTIAL(add, AddReductionVL)
1478 REDUCE_ADDF_PARTIAL(addF, AddReductionVF, vRegF, S)
1479 REDUCE_ADDF_PARTIAL(addD, AddReductionVD, vRegD, D)
1480 
1481 // vector add reduction - predicated
1482 REDUCE_I_PREDICATE(add, AddReductionVI)
1483 REDUCE_L_PREDICATE(add, AddReductionVL)
1484 REDUCE_ADDF_PREDICATE(addF, AddReductionVF, vRegF, S)
1485 REDUCE_ADDF_PREDICATE(addD, AddReductionVD, vRegD, D)
1486 REDUCE_I_PREDICATE_PARTIAL(add, AddReductionVI)
1487 REDUCE_L_PREDICATE_PARTIAL(add, AddReductionVL)
1488 REDUCE_ADDF_PREDICATE_PARTIAL(addF, AddReductionVF, vRegF, S)
1489 REDUCE_ADDF_PREDICATE_PARTIAL(addD, AddReductionVD, vRegD, D)
1490 
1491 // vector and reduction
1492 REDUCE_I(and, AndReductionV)
1493 REDUCE_L(and, AndReductionV)
1494 REDUCE_I_PARTIAL(and, AndReductionV)
1495 REDUCE_L_PARTIAL(and, AndReductionV)
1496 
1497 // vector and reduction - predicated
1498 REDUCE_I_PREDICATE(and, AndReductionV)
1499 REDUCE_L_PREDICATE(and, AndReductionV)
1500 REDUCE_I_PREDICATE_PARTIAL(and, AndReductionV)
1501 REDUCE_L_PREDICATE_PARTIAL(and, AndReductionV)
1502 
1503 // vector or reduction
1504 REDUCE_I(or, OrReductionV)
1505 REDUCE_L(or, OrReductionV)
1506 REDUCE_I_PARTIAL(or, OrReductionV)
1507 REDUCE_L_PARTIAL(or, OrReductionV)
1508 
1509 // vector or reduction - predicated
1510 REDUCE_I_PREDICATE(or, OrReductionV)
1511 REDUCE_L_PREDICATE(or, OrReductionV)
1512 REDUCE_I_PREDICATE_PARTIAL(or, OrReductionV)
1513 REDUCE_L_PREDICATE_PARTIAL(or, OrReductionV)
1514 
1515 // vector xor reduction
1516 REDUCE_I(eor, XorReductionV)
1517 REDUCE_L(eor, XorReductionV)
1518 REDUCE_I_PARTIAL(eor, XorReductionV)
1519 REDUCE_L_PARTIAL(eor, XorReductionV)
1520 
1521 // vector xor reduction - predicated
1522 REDUCE_I_PREDICATE(eor, XorReductionV)
1523 REDUCE_L_PREDICATE(eor, XorReductionV)
1524 REDUCE_I_PREDICATE_PARTIAL(eor, XorReductionV)
1525 REDUCE_L_PREDICATE_PARTIAL(eor, XorReductionV)
1526 
1527 dnl
1528 dnl REDUCE_MAXMIN_I($1,        $2     )
1529 dnl REDUCE_MAXMIN_I(insn_name, op_name)
1530 define(`REDUCE_MAXMIN_I', `
1531 instruct reduce_$1I(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
1532   predicate(UseSVE > 0 &&
1533             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1534             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1535             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
1536   match(Set dst ($2 src1 src2));
1537   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1538   ins_cost(SVE_COST);
1539   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction (sve)" %}
1540   ins_encode %{
1541     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1542     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1543                            $src1$$Register, as_FloatRegister($src2$$reg),
1544                            ptrue, as_FloatRegister($tmp$$reg));
1545   %}
1546   ins_pipe(pipe_slow);
1547 %}')dnl
1548 dnl
1549 dnl REDUCE_MAXMIN_L($1,        $2     )
1550 dnl REDUCE_MAXMIN_L(insn_name, op_name)
1551 define(`REDUCE_MAXMIN_L', `
1552 instruct reduce_$1L(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
1553   predicate(UseSVE > 0 &&
1554             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1555             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1556   match(Set dst ($2 src1 src2));
1557   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1558   ins_cost(SVE_COST);
1559   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction (sve)" %}
1560   ins_encode %{
1561     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1562                            $src1$$Register, as_FloatRegister($src2$$reg),
1563                            ptrue, as_FloatRegister($tmp$$reg));
1564   %}
1565   ins_pipe(pipe_slow);
1566 %}')dnl
1567 dnl
1568 dnl REDUCE_MAXMIN_I_PARTIAL($1     , $2     )
1569 dnl REDUCE_MAXMIN_I_PARTIAL(min_max, op_name)
1570 define(`REDUCE_MAXMIN_I_PARTIAL', `
1571 instruct reduce_$1I_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1572                              pRegGov ptmp, rFlagsReg cr) %{
1573   predicate(UseSVE > 0 &&
1574             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1575             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1576             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
1577   match(Set dst ($2 src1 src2));
1578   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1579   ins_cost(2 * SVE_COST);
1580   format %{ "sve_reduce_$1I $dst, $src1, $src2\t# $1I reduction partial (sve)" %}
1581   ins_encode %{
1582     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1583     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1584     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1585                           Matcher::vector_length(this, $src2));
1586     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1587                            $src1$$Register, as_FloatRegister($src2$$reg),
1588                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1589   %}
1590   ins_pipe(pipe_slow);
1591 %}')dnl
1592 dnl
1593 dnl REDUCE_MAXMIN_L_PARTIAL($1     , $2     )
1594 dnl REDUCE_MAXMIN_L_PARTIAL(min_max, op_name)
1595 define(`REDUCE_MAXMIN_L_PARTIAL', `
1596 instruct reduce_$1L_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1597                              pRegGov ptmp, rFlagsReg cr) %{
1598   predicate(UseSVE > 0 &&
1599             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1600             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1601   match(Set dst ($2 src1 src2));
1602   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1603   ins_cost(2 * SVE_COST);
1604   format %{ "sve_reduce_$1L $dst, $src1, $src2\t# $1L reduction  partial (sve)" %}
1605   ins_encode %{
1606     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1607                           Matcher::vector_length(this, $src2));
1608     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1609                            $src1$$Register, as_FloatRegister($src2$$reg),
1610                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1611   %}
1612   ins_pipe(pipe_slow);
1613 %}')dnl
1614 dnl
1615 dnl REDUCE_MAXMIN_I_PREDICATE($1     , $2     )
1616 dnl REDUCE_MAXMIN_I_PREDICATE(min_max, op_name)
1617 define(`REDUCE_MAXMIN_I_PREDICATE', `
1618 instruct reduce_$1I_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp,
1619                            pRegGov pg, rFlagsReg cr) %{
1620   predicate(UseSVE > 0 &&
1621             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1622             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1623             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
1624   match(Set dst ($2 (Binary src1 src2) pg));
1625   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1626   ins_cost(SVE_COST);
1627   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated (sve)" %}
1628   ins_encode %{
1629     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1630     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1631                            $src1$$Register, as_FloatRegister($src2$$reg),
1632                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1633   %}
1634   ins_pipe(pipe_slow);
1635 %}')dnl
1636 dnl
1637 dnl REDUCE_MAXMIN_L_PREDICATE($1     , $2     )
1638 dnl REDUCE_MAXMIN_L_PREDICATE(min_max, op_name)
1639 define(`REDUCE_MAXMIN_L_PREDICATE', `
1640 instruct reduce_$1L_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp,
1641                           pRegGov pg, rFlagsReg cr) %{
1642   predicate(UseSVE > 0 &&
1643             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1644             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1645   match(Set dst ($2 (Binary src1 src2) pg));
1646   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1647   ins_cost(SVE_COST);
1648   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated (sve)" %}
1649   ins_encode %{
1650     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1651                            $src1$$Register, as_FloatRegister($src2$$reg),
1652                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
1653   %}
1654   ins_pipe(pipe_slow);
1655 %}')dnl
1656 dnl
1657 dnl REDUCE_MAXMIN_I_PREDICATE_PARTIAL($1     , $2     )
1658 dnl REDUCE_MAXMIN_I_PREDICATE_PARTIAL(min_max, op_name)
1659 define(`REDUCE_MAXMIN_I_PREDICATE_PARTIAL', `
1660 instruct reduce_$1I_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1661                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
1662   predicate(UseSVE > 0 &&
1663             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1664             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1665             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
1666   match(Set dst ($2 (Binary src1 src2) pg));
1667   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1668   ins_cost(3 * SVE_COST);
1669   format %{ "sve_reduce_$1I $dst, $src1, $pg, $src2\t# $1I reduction predicated partial (sve)" %}
1670   ins_encode %{
1671     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1672     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1673     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1674                           Matcher::vector_length(this, $src2));
1675     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1676                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1677     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
1678                            $src1$$Register, as_FloatRegister($src2$$reg),
1679                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1680   %}
1681   ins_pipe(pipe_slow);
1682 %}')dnl
1683 dnl
1684 dnl REDUCE_MAXMIN_L_PREDICATE_PARTIAL($1     , $2     )
1685 dnl REDUCE_MAXMIN_L_PREDICATE_PARTIAL(min_max, op_name)
1686 define(`REDUCE_MAXMIN_L_PREDICATE_PARTIAL', `
1687 instruct reduce_$1L_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1688                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
1689   predicate(UseSVE > 0 &&
1690             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1691             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1692   match(Set dst ($2 (Binary src1 src2) pg));
1693   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1694   ins_cost(3 * SVE_COST);
1695   format %{ "sve_reduce_$1L $dst, $src1, $pg, $src2\t# $1L reduction predicated partial (sve)" %}
1696   ins_encode %{
1697     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1698                           Matcher::vector_length(this, $src2));
1699     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1700                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1701     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
1702                            $src1$$Register, as_FloatRegister($src2$$reg),
1703                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
1704   %}
1705   ins_pipe(pipe_slow);
1706 %}')dnl
1707 dnl
1708 dnl REDUCE_FMINMAX($1,      $2,          $3,           $4,   $5         )
1709 dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
1710 define(`REDUCE_FMINMAX', `
1711 instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
1712   predicate(UseSVE > 0 &&
1713             n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1714             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1715   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1716   ins_cost(INSN_COST);
1717   effect(TEMP_DEF dst);
1718   format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction (sve)" %}
1719   ins_encode %{
1720     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src2$$reg));
1721     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1722   %}
1723   ins_pipe(pipe_slow);
1724 %}')dnl
1725 dnl
1726 dnl REDUCE_FMINMAX_PARTIAL($1,      $2,          $3,           $4,   $5         )
1727 dnl REDUCE_FMINMAX_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst)
1728 define(`REDUCE_FMINMAX_PARTIAL', `
1729 instruct reduce_$1$2_partial($5 dst, $5 src1, vReg src2,
1730                              pRegGov ptmp, rFlagsReg cr) %{
1731   predicate(UseSVE > 0 &&
1732             n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1733             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1734   match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
1735   ins_cost(INSN_COST);
1736   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1737   format %{ "sve_reduce_$1$2 $dst, $src1, $src2\t# $1$2 reduction partial (sve)" %}
1738   ins_encode %{
1739     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1740                           Matcher::vector_length(this, $src2));
1741     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1742     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1743   %}
1744   ins_pipe(pipe_slow);
1745 %}')dnl
1746 dnl
1747 dnl REDUCE_FMINMAX_PREDICATE($1,      $2,          $3,           $4,   $5         )
1748 dnl REDUCE_FMINMAX_PREDICATE(min_max, name_suffix, element_type, size, reg_src_dst)
1749 define(`REDUCE_FMINMAX_PREDICATE', `
1750 instruct reduce_$1$2_masked($5 dst, $5 src1, vReg src2, pRegGov pg) %{
1751   predicate(UseSVE > 0 &&
1752             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1753             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1754   match(Set dst (translit($1, `m', `M')ReductionV (Binary src1 src2) pg));
1755   ins_cost(SVE_COST);
1756   format %{ "sve_reduce_$1$2 $dst, $src1, $pg, $src2\t# $1$2 reduction predicated (sve)" %}
1757   ins_encode %{
1758     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
1759     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1760   %}
1761   ins_pipe(pipe_slow);
1762 %}')dnl
1763 dnl
1764 dnl REDUCE_FMINMAX_PREDICATE_PARTIAL($1,      $2,          $3,           $4,   $5         )
1765 dnl REDUCE_FMINMAX_PREDICATE_PARTIAL(min_max, name_suffix, element_type, size, reg_src_dst)
1766 define(`REDUCE_FMINMAX_PREDICATE_PARTIAL', `
1767 instruct reduce_$1$2_masked_partial($5 dst, $5 src1, vReg src2, pRegGov pg,
1768                                     pRegGov ptmp, rFlagsReg cr) %{
1769   predicate(UseSVE > 0 &&
1770             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
1771             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1772   match(Set dst (translit($1, `m', `M')ReductionV (Binary src1 src2) pg));
1773   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1774   ins_cost(3 * SVE_COST);
1775   format %{ "sve_reduce_$1$2 $dst, $src1, $pg, $src2\t# $1$2 reduction predicated partial (sve)" %}
1776   ins_encode %{
1777     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ $4,
1778                           Matcher::vector_length(this, $src2));
1779     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
1780                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
1781     __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
1782                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1783     __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1784   %}
1785   ins_pipe(pipe_slow);
1786 %}')dnl
1787 // vector max reduction
1788 REDUCE_MAXMIN_I(max, MaxReductionV)
1789 REDUCE_MAXMIN_L(max, MaxReductionV)
1790 REDUCE_MAXMIN_I_PARTIAL(max, MaxReductionV)
1791 REDUCE_MAXMIN_L_PARTIAL(max, MaxReductionV)
1792 REDUCE_FMINMAX(max, F, T_FLOAT,  S, vRegF)
1793 REDUCE_FMINMAX_PARTIAL(max, F, T_FLOAT,  S, vRegF)
1794 REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
1795 REDUCE_FMINMAX_PARTIAL(max, D, T_DOUBLE, D, vRegD)
1796 
1797 // vector max reduction - predicated
1798 REDUCE_MAXMIN_I_PREDICATE(max, MaxReductionV)
1799 REDUCE_MAXMIN_L_PREDICATE(max, MaxReductionV)
1800 REDUCE_MAXMIN_I_PREDICATE_PARTIAL(max, MaxReductionV)
1801 REDUCE_MAXMIN_L_PREDICATE_PARTIAL(max, MaxReductionV)
1802 REDUCE_FMINMAX_PREDICATE(max, F, T_FLOAT,  S, vRegF)
1803 REDUCE_FMINMAX_PREDICATE(max, D, T_DOUBLE, D, vRegD)
1804 REDUCE_FMINMAX_PREDICATE_PARTIAL(max, F, T_FLOAT,  S, vRegF)
1805 REDUCE_FMINMAX_PREDICATE_PARTIAL(max, D, T_DOUBLE, D, vRegD)
1806 
1807 // vector min reduction
1808 REDUCE_MAXMIN_I(min, MinReductionV)
1809 REDUCE_MAXMIN_L(min, MinReductionV)
1810 REDUCE_MAXMIN_I_PARTIAL(min, MinReductionV)
1811 REDUCE_MAXMIN_L_PARTIAL(min, MinReductionV)
1812 REDUCE_FMINMAX(min, F, T_FLOAT,  S, vRegF)
1813 REDUCE_FMINMAX_PARTIAL(min, F, T_FLOAT,  S, vRegF)
1814 REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
1815 REDUCE_FMINMAX_PARTIAL(min, D, T_DOUBLE, D, vRegD)
1816 
1817 // vector min reduction - predicated
1818 REDUCE_MAXMIN_I_PREDICATE(min, MinReductionV)
1819 REDUCE_MAXMIN_L_PREDICATE(min, MinReductionV)
1820 REDUCE_MAXMIN_I_PREDICATE_PARTIAL(min, MinReductionV)
1821 REDUCE_MAXMIN_L_PREDICATE_PARTIAL(min, MinReductionV)
1822 REDUCE_FMINMAX_PREDICATE(min, F, T_FLOAT,  S, vRegF)
1823 REDUCE_FMINMAX_PREDICATE(min, D, T_DOUBLE, D, vRegD)
1824 REDUCE_FMINMAX_PREDICATE_PARTIAL(min, F, T_FLOAT,  S, vRegF)
1825 REDUCE_FMINMAX_PREDICATE_PARTIAL(min, D, T_DOUBLE, D, vRegD)
1826 
1827 // vector Math.rint, floor, ceil
1828 
1829 instruct vroundD(vReg dst, vReg src, immI rmode) %{
1830   predicate(UseSVE > 0 &&
1831             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1832   match(Set dst (RoundDoubleModeV src rmode));
1833   format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
1834   ins_encode %{
1835     switch ($rmode$$constant) {
1836       case RoundDoubleModeNode::rmode_rint:
1837         __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
1838              ptrue, as_FloatRegister($src$$reg));
1839         break;
1840       case RoundDoubleModeNode::rmode_floor:
1841         __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
1842              ptrue, as_FloatRegister($src$$reg));
1843         break;
1844       case RoundDoubleModeNode::rmode_ceil:
1845         __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
1846              ptrue, as_FloatRegister($src$$reg));
1847         break;
1848     }
1849   %}
1850   ins_pipe(pipe_slow);
1851 %}
1852 dnl
1853 dnl REPLICATE($1,        $2,      $3,      $4,   $5         )
1854 dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
1855 define(`REPLICATE', `
1856 instruct $1(vReg dst, $3 src) %{
1857   predicate(UseSVE > 0);
1858   match(Set dst ($2 src));
1859   ins_cost(SVE_COST);
1860   format %{ "sve_dup  $dst, $src\t# vector (sve) ($4)" %}
1861   ins_encode %{
1862     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
1863   %}
1864   ins_pipe(pipe_slow);
1865 %}')dnl
1866 dnl
1867 dnl REPLICATE_IMM8($1,        $2,      $3,       $4,   $5         )
1868 dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
1869 define(`REPLICATE_IMM8', `
1870 instruct $1(vReg dst, $3 con) %{
1871   predicate(UseSVE > 0);
1872   match(Set dst ($2 con));
1873   ins_cost(SVE_COST);
1874   format %{ "sve_dup  $dst, $con\t# vector (sve) ($4)" %}
1875   ins_encode %{
1876     __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
1877   %}
1878   ins_pipe(pipe_slow);
1879 %}')dnl
1880 dnl
1881 dnl FREPLICATE($1,        $2,      $3,        $4)
1882 dnl FREPLICATE(insn_name, op_name, reg_src, size)
1883 define(`FREPLICATE', `
1884 instruct $1(vReg dst, $3 src) %{
1885   predicate(UseSVE > 0);
1886   match(Set dst ($2 src));
1887   ins_cost(SVE_COST);
1888   format %{ "sve_cpy  $dst, $src\t# vector (sve) ($4)" %}
1889   ins_encode %{
1890     __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
1891          ptrue, as_FloatRegister($src$$reg));
1892   %}
1893   ins_pipe(pipe_slow);
1894 %}')dnl
1895 
1896 // vector replicate
1897 REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
1898 REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
1899 REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
1900 REPLICATE(replicateL, ReplicateL, iRegL,      D, 2)
1901 REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8,        B, 16)
1902 REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
1903 REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
1904 REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
1905 FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
1906 FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
1907 dnl
1908 dnl VSHIFT_TRUE_PREDICATE($1,        $2,      $3,   $4,          $5  )
1909 dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
1910 define(`VSHIFT_TRUE_PREDICATE', `
1911 instruct $1(vReg dst, vReg shift) %{
1912   predicate(UseSVE > 0);
1913   match(Set dst ($2 dst shift));
1914   ins_cost(SVE_COST);
1915   format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
1916   ins_encode %{
1917     __ $5(as_FloatRegister($dst$$reg), __ $3,
1918          ptrue, as_FloatRegister($shift$$reg));
1919   %}
1920   ins_pipe(pipe_slow);
1921 %}')dnl
1922 dnl
1923 dnl VSHIFT_IMM_UNPREDICATED($1,        $2,      $3,       $4,   $5,          $6  )
1924 dnl VSHIFT_IMM_UNPREDICATED(insn_name, op_name, op_name2, size, min_vec_len, insn)
1925 define(`VSHIFT_IMM_UNPREDICATED', `
1926 instruct $1(vReg dst, vReg src, immI shift) %{
1927   predicate(UseSVE > 0);
1928   match(Set dst ($2 src ($3 shift)));
1929   ins_cost(SVE_COST);
1930   format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
1931   ins_encode %{
1932     int con = (int)$shift$$constant;dnl
1933 ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
1934     if (con == 0) {
1935       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1936            as_FloatRegister($src$$reg));
1937       return;
1938     }')dnl
1939 ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1940     if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
1941     if (con >= 16) con = 15;')')dnl
1942 ifelse(eval(index(`$1', `vlsl') == 0  || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
1943     if (con >= 8) {
1944       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1945            as_FloatRegister($src$$reg));
1946       return;
1947     }')ifelse(eval(index(`$4', `H') == 0), 1, `
1948     if (con >= 16) {
1949       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
1950            as_FloatRegister($src$$reg));
1951       return;
1952     }')')
1953     __ $6(as_FloatRegister($dst$$reg), __ $4,
1954          as_FloatRegister($src$$reg), con);
1955   %}
1956   ins_pipe(pipe_slow);
1957 %}')dnl
1958 dnl
1959 dnl VSHIFT_COUNT($1,        $2,   $3,          $4  )
1960 dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
1961 define(`VSHIFT_COUNT', `
1962 instruct $1(vReg dst, iRegIorL2I cnt) %{
1963   predicate(UseSVE > 0 &&
1964             ELEMENT_SHORT_CHAR($4, n));
1965   match(Set dst (LShiftCntV cnt));
1966   match(Set dst (RShiftCntV cnt));
1967   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
1968   ins_encode %{
1969     __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
1970   %}
1971   ins_pipe(pipe_slow);
1972 %}')dnl
1973 
1974 // vector shift
1975 VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB,  B, 16, sve_asr)
1976 VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS,  H,  8, sve_asr)
1977 VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI,  S,  4, sve_asr)
1978 VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL,  D,  2, sve_asr)
1979 VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB,  B, 16, sve_lsl)
1980 VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS,  H,  8, sve_lsl)
1981 VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI,  S,  4, sve_lsl)
1982 VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL,  D,  2, sve_lsl)
1983 VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
1984 VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H,  8, sve_lsr)
1985 VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S,  4, sve_lsr)
1986 VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D,  2, sve_lsr)
1987 VSHIFT_IMM_UNPREDICATED(vasrB_imm, RShiftVB,  RShiftCntV, B, 16, sve_asr)
1988 VSHIFT_IMM_UNPREDICATED(vasrS_imm, RShiftVS,  RShiftCntV, H,  8, sve_asr)
1989 VSHIFT_IMM_UNPREDICATED(vasrI_imm, RShiftVI,  RShiftCntV, S,  4, sve_asr)
1990 VSHIFT_IMM_UNPREDICATED(vasrL_imm, RShiftVL,  RShiftCntV, D,  2, sve_asr)
1991 VSHIFT_IMM_UNPREDICATED(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
1992 VSHIFT_IMM_UNPREDICATED(vlsrS_imm, URShiftVS, RShiftCntV, H,  8, sve_lsr)
1993 VSHIFT_IMM_UNPREDICATED(vlsrI_imm, URShiftVI, RShiftCntV, S,  4, sve_lsr)
1994 VSHIFT_IMM_UNPREDICATED(vlsrL_imm, URShiftVL, RShiftCntV, D,  2, sve_lsr)
1995 VSHIFT_IMM_UNPREDICATED(vlslB_imm, LShiftVB,  LShiftCntV, B, 16, sve_lsl)
1996 VSHIFT_IMM_UNPREDICATED(vlslS_imm, LShiftVS,  LShiftCntV, H,  8, sve_lsl)
1997 VSHIFT_IMM_UNPREDICATED(vlslI_imm, LShiftVI,  LShiftCntV, S,  4, sve_lsl)
1998 VSHIFT_IMM_UNPREDICATED(vlslL_imm, LShiftVL,  LShiftCntV, D,  2, sve_lsl)
1999 VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
2000 VSHIFT_COUNT(vshiftcntS, H,  8, T_SHORT)
2001 VSHIFT_COUNT(vshiftcntI, S,  4, T_INT)
2002 VSHIFT_COUNT(vshiftcntL, D,  2, T_LONG)
2003 
2004 // vector shift - predicated
2005 BINARY_OP_PREDICATE(vasrB, RShiftVB,  B, sve_asr)
2006 BINARY_OP_PREDICATE(vasrS, RShiftVS,  H, sve_asr)
2007 BINARY_OP_PREDICATE(vasrI, RShiftVI,  S, sve_asr)
2008 BINARY_OP_PREDICATE(vasrL, RShiftVL,  D, sve_asr)
2009 BINARY_OP_PREDICATE(vlslB, LShiftVB,  B, sve_lsl)
2010 BINARY_OP_PREDICATE(vlslS, LShiftVS,  H, sve_lsl)
2011 BINARY_OP_PREDICATE(vlslI, LShiftVI,  S, sve_lsl)
2012 BINARY_OP_PREDICATE(vlslL, LShiftVL,  D, sve_lsl)
2013 BINARY_OP_PREDICATE(vlsrB, URShiftVB, B, sve_lsr)
2014 BINARY_OP_PREDICATE(vlsrS, URShiftVS, H, sve_lsr)
2015 BINARY_OP_PREDICATE(vlsrI, URShiftVI, S, sve_lsr)
2016 BINARY_OP_PREDICATE(vlsrL, URShiftVL, D, sve_lsr)
2017 dnl
2018 dnl VSHIFT_IMM_PREDICATED($1,        $2,      $3,       $4,   $5,   $6  )
2019 dnl VSHIFT_IMM_PREDICATED(insn_name, op_name, op_name2, type, size, insn)
2020 define(`VSHIFT_IMM_PREDICATED', `
2021 instruct $1_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
2022   predicate(UseSVE > 0);
2023   match(Set dst_src ($2 (Binary dst_src ($3 shift)) pg));
2024   ins_cost(SVE_COST);
2025   format %{ "$6 $dst_src, $pg, $dst_src, $shift\t# vector (sve) ($4)" %}
2026   ins_encode %{
2027     int con = (int)$shift$$constant;
2028     assert(con ifelse(index(`$1', `vlsl'), 0, `>=', `>') 0 && con < $5, "invalid shift immediate");
2029     __ $6(as_FloatRegister($dst_src$$reg), __ $4, as_PRegister($pg$$reg), con);
2030   %}
2031   ins_pipe(pipe_slow);
2032 %}')dnl
2033 dnl
2034 VSHIFT_IMM_PREDICATED(vasrB, RShiftVB,  RShiftCntV, B, 8,  sve_asr)
2035 VSHIFT_IMM_PREDICATED(vasrS, RShiftVS,  RShiftCntV, H, 16, sve_asr)
2036 VSHIFT_IMM_PREDICATED(vasrI, RShiftVI,  RShiftCntV, S, 32, sve_asr)
2037 VSHIFT_IMM_PREDICATED(vasrL, RShiftVL,  RShiftCntV, D, 64, sve_asr)
2038 VSHIFT_IMM_PREDICATED(vlsrB, URShiftVB, RShiftCntV, B, 8,  sve_lsr)
2039 VSHIFT_IMM_PREDICATED(vlsrS, URShiftVS, RShiftCntV, H, 16, sve_lsr)
2040 VSHIFT_IMM_PREDICATED(vlsrI, URShiftVI, RShiftCntV, S, 32, sve_lsr)
2041 VSHIFT_IMM_PREDICATED(vlsrL, URShiftVL, RShiftCntV, D, 64, sve_lsr)
2042 VSHIFT_IMM_PREDICATED(vlslB, LShiftVB,  LShiftCntV, B, 8,  sve_lsl)
2043 VSHIFT_IMM_PREDICATED(vlslS, LShiftVS,  LShiftCntV, H, 16, sve_lsl)
2044 VSHIFT_IMM_PREDICATED(vlslI, LShiftVI,  LShiftCntV, S, 32, sve_lsl)
2045 VSHIFT_IMM_PREDICATED(vlslL, LShiftVL,  LShiftCntV, D, 64, sve_lsl)
2046 
2047 // vector sqrt
2048 UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt)
2049 UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt)
2050 
2051 // vector sqrt - predicated
2052 UNARY_OP_PREDICATE(vsqrtF, SqrtVF, S, sve_fsqrt)
2053 UNARY_OP_PREDICATE(vsqrtD, SqrtVD, D, sve_fsqrt)
2054 
2055 // vector sub
2056 BINARY_OP_UNPREDICATE(vsubB, SubVB, B, 16, sve_sub)
2057 BINARY_OP_UNPREDICATE(vsubS, SubVS, H, 8, sve_sub)
2058 BINARY_OP_UNPREDICATE(vsubI, SubVI, S, 4, sve_sub)
2059 BINARY_OP_UNPREDICATE(vsubL, SubVL, D, 2, sve_sub)
2060 BINARY_OP_UNPREDICATE(vsubF, SubVF, S, 4, sve_fsub)
2061 BINARY_OP_UNPREDICATE(vsubD, SubVD, D, 2, sve_fsub)
2062 
2063 // vector sub - predicated
2064 BINARY_OP_PREDICATE(vsubB, SubVB, B, sve_sub)
2065 BINARY_OP_PREDICATE(vsubS, SubVS, H, sve_sub)
2066 BINARY_OP_PREDICATE(vsubI, SubVI, S, sve_sub)
2067 BINARY_OP_PREDICATE(vsubL, SubVL, D, sve_sub)
2068 BINARY_OP_PREDICATE(vsubF, SubVF, S, sve_fsub)
2069 BINARY_OP_PREDICATE(vsubD, SubVD, D, sve_fsub)
2070 
2071 // ------------------------------ Vector mask cast --------------------------
2072 
2073 instruct vmaskcast(pRegGov dst_src) %{
2074   predicate(UseSVE > 0 &&
2075             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
2076             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
2077   match(Set dst_src (VectorMaskCast dst_src));
2078   ins_cost(0);
2079   format %{ "vmaskcast $dst_src\t# empty (sve)" %}
2080   ins_encode %{
2081     // empty
2082   %}
2083   ins_pipe(pipe_class_empty);
2084 %}
2085 
2086 instruct vmaskcast_extend(pRegGov dst, pReg src)
2087 %{
2088   predicate(UseSVE > 0 &&
2089             (Matcher::vector_length_in_bytes(n) == 2 * Matcher::vector_length_in_bytes(n->in(1)) ||
2090              Matcher::vector_length_in_bytes(n) == 4 * Matcher::vector_length_in_bytes(n->in(1)) ||
2091              Matcher::vector_length_in_bytes(n) == 8 * Matcher::vector_length_in_bytes(n->in(1))));
2092   match(Set dst (VectorMaskCast src));
2093   ins_cost(SVE_COST * 3);
2094   format %{ "sve_vmaskcast_extend  $dst, $src\t# extend predicate $src" %}
2095   ins_encode %{
2096     __ sve_vmaskcast_extend(as_PRegister($dst$$reg), as_PRegister($src$$reg),
2097                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
2098   %}
2099   ins_pipe(pipe_slow);
2100 %}
2101 
2102 instruct vmaskcast_narrow(pRegGov dst, pReg src)
2103 %{
2104   predicate(UseSVE > 0 &&
2105             (Matcher::vector_length_in_bytes(n) * 2 == Matcher::vector_length_in_bytes(n->in(1)) ||
2106              Matcher::vector_length_in_bytes(n) * 4 == Matcher::vector_length_in_bytes(n->in(1)) ||
2107              Matcher::vector_length_in_bytes(n) * 8 == Matcher::vector_length_in_bytes(n->in(1))));
2108   match(Set dst (VectorMaskCast src));
2109   ins_cost(SVE_COST * 3);
2110   format %{ "sve_vmaskcast_narrow  $dst, $src\t# narrow predicate $src" %}
2111   ins_encode %{
2112     __ sve_vmaskcast_narrow(as_PRegister($dst$$reg), as_PRegister($src$$reg),
2113                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
2114   %}
2115   ins_pipe(pipe_slow);
2116 %}
2117 dnl
2118 
2119 // ------------------------------ Vector cast -------------------------------
2120 dnl
2121 dnl
2122 define(`VECTOR_CAST_X2X', `
2123 instruct vcvt$1to$2`'(vReg dst, vReg src)
2124 %{
2125   predicate(UseSVE > 0 &&
2126             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
2127   match(Set dst (VectorCast$1`'2X src));
2128   ins_cost(SVE_COST);
2129   format %{ "sve_vectorcast_$5  $dst, $src\t# convert $1 to $2 vector" %}
2130   ins_encode %{
2131     __ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
2132   %}
2133   ins_pipe(pipe_slow);
2134 %}')dnl
2135 
2136 dnl
2137 dnl Start of vector cast rules
2138 dnl
2139 instruct vcvtBtoX_extend(vReg dst, vReg src)
2140 %{
2141   predicate(UseSVE > 0);
2142   match(Set dst (VectorCastB2X src));
2143   ins_cost(2 * SVE_COST);
2144   format %{ "sve_vectorcast_b2x  $dst, $src\t# convert B to X vector (extend)" %}
2145   ins_encode %{
2146     BasicType to_bt = Matcher::vector_element_basic_type(this);
2147     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2148     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ B);
2149     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
2150       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
2151     }
2152   %}
2153   ins_pipe(pipe_slow);
2154 %}
2155 
2156 instruct vcvtStoB(vReg dst, vReg src, vReg tmp)
2157 %{
2158   predicate(UseSVE > 0 &&
2159             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2160   match(Set dst (VectorCastS2X src));
2161   effect(TEMP tmp);
2162   ins_cost(2 * SVE_COST);
2163   format %{ "sve_vectorcast_s2b  $dst, $src\t# convert H to B vector" %}
2164   ins_encode %{
2165     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
2166                          as_FloatRegister($src$$reg), __ H, as_FloatRegister($tmp$$reg));
2167   %}
2168   ins_pipe(pipe_slow);
2169 %}
2170 
2171 instruct vcvtStoX_extend(vReg dst, vReg src)
2172 %{
2173   predicate(UseSVE > 0 &&
2174             type2aelembytes(Matcher::vector_element_basic_type(n)) > 2);
2175   match(Set dst (VectorCastS2X src));
2176   ins_cost(2 * SVE_COST);
2177   format %{ "sve_vectorcast_s2x  $dst, $src\t# convert H to X vector (extend)" %}
2178   ins_encode %{
2179     BasicType to_bt = Matcher::vector_element_basic_type(this);
2180     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2181     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ H);
2182     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
2183       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
2184     }
2185   %}
2186   ins_pipe(pipe_slow);
2187 %}
2188 
2189 instruct vcvtItoB(vReg dst, vReg src, vReg tmp)
2190 %{
2191   predicate(UseSVE > 0 &&
2192             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2193   match(Set dst (VectorCastI2X src));
2194   effect(TEMP_DEF dst, TEMP tmp);
2195   ins_cost(3 * SVE_COST);
2196   format %{ "sve_vectorcast_i2b  $dst, $src\t# convert I to B vector" %}
2197   ins_encode %{
2198     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
2199                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
2200   %}
2201   ins_pipe(pipe_slow);
2202 %}
2203 
2204 instruct vcvtItoS(vReg dst, vReg src, vReg tmp)
2205 %{
2206   predicate(UseSVE > 0 &&
2207             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2208   match(Set dst (VectorCastI2X src));
2209   effect(TEMP tmp);
2210   ins_cost(2 * SVE_COST);
2211   format %{ "sve_vectorcast_i2s $dst, $src\t# convert I to H vector" %}
2212   ins_encode %{
2213     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ H,
2214                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
2215   %}
2216   ins_pipe(pipe_slow);
2217 %}
2218 
2219 instruct vcvtItoL(vReg dst, vReg src)
2220 %{
2221   predicate(UseSVE > 0 &&
2222             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2223   match(Set dst (VectorCastI2X src));
2224   ins_cost(SVE_COST);
2225   format %{ "sve_vectorcast_i2l  $dst, $src\t# convert I to L vector" %}
2226   ins_encode %{
2227     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
2228   %}
2229   ins_pipe(pipe_slow);
2230 %}
2231 dnl
2232 dnl vcvtItoF
2233 VECTOR_CAST_X2X(I, F, scvtf, S, i2f)
2234 
2235 instruct vcvtItoD(vReg dst, vReg src)
2236 %{
2237   predicate(UseSVE > 0 &&
2238             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2239   match(Set dst (VectorCastI2X src));
2240   ins_cost(2 * SVE_COST);
2241   format %{ "sve_vectorcast_i2d  $dst, $src\t# convert I to D vector" %}
2242   ins_encode %{
2243     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
2244     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
2245   %}
2246   ins_pipe(pipe_slow);
2247 %}
2248 
2249 instruct vcvtLtoX_narrow(vReg dst, vReg src, vReg tmp)
2250 %{
2251   predicate(UseSVE > 0 && is_integral_type(Matcher::vector_element_basic_type(n)));
2252   match(Set dst (VectorCastL2X src));
2253   effect(TEMP_DEF dst, TEMP tmp);
2254   ins_cost(2 * SVE_COST);
2255   format %{ "sve_vectorcast_l2x  $dst, $src\t# convert L to B/H/S vector (narrow)" %}
2256   ins_encode %{
2257     BasicType to_bt = Matcher::vector_element_basic_type(this);
2258     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2259     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2260                          as_FloatRegister($src$$reg), __ D, as_FloatRegister($tmp$$reg));
2261   %}
2262   ins_pipe(pipe_slow);
2263 %}
2264 
2265 instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)
2266 %{
2267   predicate(UseSVE > 0 &&
2268             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2269   match(Set dst (VectorCastL2X src));
2270   effect(TEMP_DEF dst, TEMP tmp);
2271   ins_cost(3 * SVE_COST);
2272   format %{ "sve_vectorcast_l2f  $dst, $src\t# convert L to F vector" %}
2273   ins_encode %{
2274     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
2275     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
2276                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2277 
2278   %}
2279   ins_pipe(pipe_slow);
2280 %}
2281 dnl
2282 dnl vcvtLtoD
2283 VECTOR_CAST_X2X(L, D, scvtf, D, l2d)
2284 
2285 instruct vcvtFtoX_narrow(vReg dst, vReg src, vReg tmp)
2286 %{
2287   predicate(UseSVE > 0 &&
2288             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2289              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT));
2290   match(Set dst (VectorCastF2X src));
2291   effect(TEMP_DEF dst, TEMP tmp);
2292   ins_cost(3 * SVE_COST);
2293   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to B/H vector" %}
2294   ins_encode %{
2295     BasicType to_bt = Matcher::vector_element_basic_type(this);
2296     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2297     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2298     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2299                          as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg));
2300   %}
2301   ins_pipe(pipe_slow);
2302 %}
2303 
2304 instruct vcvtFtoX_extend(vReg dst, vReg src)
2305 %{
2306   predicate(UseSVE > 0 &&
2307             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2308              n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
2309   match(Set dst (VectorCastF2X src));
2310   ins_cost(SVE_COST);
2311   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to I/L vector" %}
2312   ins_encode %{
2313     BasicType to_bt = Matcher::vector_element_basic_type(this);
2314     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2315     if (to_bt == T_LONG) {
2316       __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg), __ S);
2317     }
2318   %}
2319   ins_pipe(pipe_slow);
2320 %}
2321 
2322 instruct vcvtFtoD(vReg dst, vReg src)
2323 %{
2324   predicate(UseSVE > 0 &&
2325             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2326   match(Set dst (VectorCastF2X src));
2327   ins_cost(2 * SVE_COST);
2328   format %{ "sve_vectorcast_f2d  $dst, $dst\t# convert F to D vector" %}
2329   ins_encode %{
2330     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
2331     __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
2332   %}
2333   ins_pipe(pipe_slow);
2334 %}
2335 
2336 instruct vcvtDtoX_narrow(vReg dst, vReg src, vReg tmp)
2337 %{
2338   predicate(UseSVE > 0 &&
2339             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2340              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2341              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2342   match(Set dst (VectorCastD2X src));
2343   effect(TEMP_DEF dst, TEMP tmp);
2344   ins_cost(3 * SVE_COST);
2345   format %{ "sve_vectorcast_d2x  $dst, $src\t# convert D to X vector (narrow)" %}
2346   ins_encode %{
2347     BasicType to_bt = Matcher::vector_element_basic_type(this);
2348     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
2349     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
2350     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
2351                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2352   %}
2353   ins_pipe(pipe_slow);
2354 %}
2355 dnl
2356 dnl vcvtDtoL
2357 VECTOR_CAST_X2X(D, L, fcvtzs, D, d2l)
2358 
2359 instruct vcvtDtoF(vReg dst, vReg src, vReg tmp)
2360 %{
2361   predicate(UseSVE > 0 &&
2362             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2363   match(Set dst (VectorCastD2X src));
2364   effect(TEMP_DEF dst, TEMP tmp);
2365   ins_cost(3 * SVE_COST);
2366   format %{ "sve_vectorcast_d2f  $dst, S, $dst\t# convert D to F vector" %}
2367   ins_encode %{
2368     __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
2369     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
2370                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
2371   %}
2372   ins_pipe(pipe_slow);
2373 %}
2374 
2375 dnl
2376 dnl
2377 // ------------------------------ Vector extract ---------------------------------
2378 define(`VECTOR_EXTRACT_SXT', `
2379 instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
2380 %{
2381   predicate(UseSVE > 0);
2382   match(Set dst (Extract$1 src idx));
2383   effect(TEMP pgtmp, KILL cr);
2384   ins_cost(2 * SVE_COST);
2385   format %{ "sve_extract $dst, $3, $pgtmp, $src, $idx\n\t"
2386             "sbfmw $dst, $dst, 0U, $5\t# extract from vector($1)" %}
2387   ins_encode %{
2388     __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pgtmp$$reg),
2389                    as_FloatRegister($src$$reg), (int)($idx$$constant));
2390     __ sbfmw(as_$4($dst$$reg), as_$4($dst$$reg), 0U, $5);
2391   %}
2392   ins_pipe(pipe_slow);
2393 %}')dnl
2394 dnl                $1 $2         $3 $4        $5
2395 VECTOR_EXTRACT_SXT(B, iRegINoSp, B, Register, 7U)
2396 VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U)
2397 
2398 dnl
2399 define(`VECTOR_EXTRACT', `
2400 instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
2401 %{
2402   predicate(UseSVE > 0);
2403   match(Set dst (Extract$1 src idx));
2404   effect(TEMP pgtmp, KILL cr);
2405   ins_cost(2 * SVE_COST);
2406   format %{ "sve_extract $dst, $3, $pgtmp, $src, $idx\t# extract from vector($1)" %}
2407   ins_encode %{
2408     __ sve_extract(as_$4($dst$$reg), __ $3, as_PRegister($pgtmp$$reg),
2409                    as_FloatRegister($src$$reg), (int)($idx$$constant));
2410   %}
2411   ins_pipe(pipe_slow);
2412 %}')dnl
2413 dnl            $1 $2         $3 $4
2414 VECTOR_EXTRACT(I, iRegINoSp, S, Register)
2415 VECTOR_EXTRACT(L, iRegLNoSp, D, Register)
2416 VECTOR_EXTRACT(F, vRegF,     S, FloatRegister)
2417 VECTOR_EXTRACT(D, vRegD,     D, FloatRegister)
2418 
2419 // ------------------------------- VectorTest ----------------------------------
2420 
2421 instruct vtest_alltrue(iRegINoSp dst, pRegGov src1, pRegGov src2, pReg ptmp, rFlagsReg cr)
2422 %{
2423   predicate(UseSVE > 0 &&
2424             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
2425             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2426   match(Set dst (VectorTest src1 src2));
2427   effect(TEMP ptmp, KILL cr);
2428   ins_cost(SVE_COST);
2429   format %{ "sve_eors $ptmp, $src1, $src2\t# $src2 is all true mask\n"
2430             "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %}
2431   ins_encode %{
2432     __ sve_eors(as_PRegister($ptmp$$reg), ptrue,
2433                 as_PRegister($src1$$reg), as_PRegister($src2$$reg));
2434     __ csetw(as_Register($dst$$reg), Assembler::EQ);
2435   %}
2436   ins_pipe(pipe_slow);
2437 %}
2438 
2439 instruct vtest_anytrue(iRegINoSp dst, pRegGov src1, pRegGov src2, rFlagsReg cr)
2440 %{
2441   predicate(UseSVE > 0 &&
2442             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
2443             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2444   match(Set dst (VectorTest src1 src2));
2445   effect(KILL cr);
2446   ins_cost(SVE_COST);
2447   format %{ "sve_ptest $src1\n\t"
2448             "csetw $dst, NE\t# VectorTest (sve) - anytrue" %}
2449   ins_encode %{
2450     // "src2" is not used for sve.
2451     __ sve_ptest(ptrue, as_PRegister($src1$$reg));
2452     __ csetw(as_Register($dst$$reg), Assembler::NE);
2453   %}
2454   ins_pipe(pipe_slow);
2455 %}
2456 dnl
2457 dnl
2458 dnl VTEST_PARTIAL($1,      $2,   $3,   $4  )
2459 dnl VTEST_PARTIAL(op_name, pred, inst, cond)
2460 define(`VTEST_PARTIAL', `
2461 instruct vtest_$1_partial`'(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr)
2462 %{
2463   predicate(UseSVE > 0 &&
2464             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
2465             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::$2);
2466   match(Set dst (VectorTest src1 src2));
2467   effect(TEMP ptmp, KILL cr);
2468   ins_cost(SVE_COST);
2469   format %{ "vtest_$1_partial $dst, $src1, $src2\t# VectorTest partial (sve) - $1" %}
2470   ins_encode %{
2471     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
2472     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2473     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size,
2474                           Matcher::vector_length(this, $src1));
2475     __ $3(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
2476           as_PRegister($src1$$reg), as_PRegister($src2$$reg));
2477     __ csetw(as_Register($dst$$reg), Assembler::$4);
2478   %}
2479   ins_pipe(pipe_slow);
2480 %}')dnl
2481 dnl
2482 VTEST_PARTIAL(alltrue, overflow, sve_eors, EQ)
2483 VTEST_PARTIAL(anytrue, ne,       sve_ands, NE)
2484 
2485 // ------------------------------ Vector insert ---------------------------------
2486 
2487 instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pgtmp, rFlagsReg cr)
2488 %{
2489   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
2490             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2491              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2492              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2493   match(Set dst (VectorInsert (Binary src val) idx));
2494   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
2495   ins_cost(4 * SVE_COST);
2496   format %{ "sve_index $dst, -16, 1\t# (B/H/S)\n\t"
2497             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2498             "sve_orr $dst, $src, $src\n\t"
2499             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
2500   ins_encode %{
2501     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2502     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2503     __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1);
2504     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
2505                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2506     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2507     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg));
2508   %}
2509   ins_pipe(pipe_slow);
2510 %}
2511 
2512 instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pgtmp, rFlagsReg cr)
2513 %{
2514   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
2515             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2516   match(Set dst (VectorInsert (Binary src val) idx));
2517   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
2518   ins_cost(4 * SVE_COST);
2519   format %{ "sve_index $dst, S, -16, 1\n\t"
2520             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2521             "sve_orr $dst, $src, $src\n\t"
2522             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
2523   ins_encode %{
2524     __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
2525     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
2526                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2527     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2528     __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
2529   %}
2530   ins_pipe(pipe_slow);
2531 %}
2532 
2533 instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr)
2534 %{
2535   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
2536             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
2537              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2538              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2539   match(Set dst (VectorInsert (Binary src val) idx));
2540   effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr);
2541   ins_cost(5 * SVE_COST);
2542   format %{ "sve_index $tmp1, 0, 1\t# (B/H/S)\n\t"
2543             "sve_dup $dst, $idx\t# (B/H/S)\n\t"
2544             "sve_cmpeq $pgtmp, $tmp1, $dst\n\t"
2545             "sve_orr $dst, $src, $src\n\t"
2546             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
2547   ins_encode %{
2548     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2549     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2550     __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1);
2551     __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant));
2552     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
2553                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
2554     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2555     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg));
2556   %}
2557   ins_pipe(pipe_slow);
2558 %}
2559 dnl
2560 dnl
2561 define(`VECTOR_INSERT_D', `
2562 instruct insert$1`'(vReg dst, vReg src, $2 val, immI idx, pRegGov pgtmp, rFlagsReg cr)
2563 %{
2564   predicate(UseSVE > 0 &&
2565             n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1));
2566   match(Set dst (VectorInsert (Binary src val) idx));
2567   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
2568   ins_cost(4 * SVE_COST);
2569   format %{ "sve_index $dst, $3, -16, 1\n\t"
2570             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
2571             "sve_orr $dst, $src, $src\n\t"
2572             "sve_cpy $dst, $pgtmp, $val\t# insert into vector ($1)" %}
2573   ins_encode %{
2574     __ sve_index(as_FloatRegister($dst$$reg), __ $3, -16, 1);
2575     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ $3, ptrue,
2576                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
2577     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2578     __ sve_cpy(as_FloatRegister($dst$$reg), __ $3, as_PRegister($pgtmp$$reg), as_$4($val$$reg));
2579   %}
2580   ins_pipe(pipe_slow);
2581 %}')dnl
2582 dnl             $1 $2     $3 $4
2583 VECTOR_INSERT_D(L, iRegL, D, Register)
2584 VECTOR_INSERT_D(D, vRegD, D, FloatRegister)
2585 
2586 instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr)
2587 %{
2588   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
2589             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2590   match(Set dst (VectorInsert (Binary src val) idx));
2591   effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr);
2592   ins_cost(5 * SVE_COST);
2593   format %{ "sve_index $tmp1, S, 0, 1\n\t"
2594             "sve_dup $dst, S, $idx\n\t"
2595             "sve_cmpeq $pgtmp, $tmp1, $dst\n\t"
2596             "sve_orr $dst, $src, $src\n\t"
2597             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
2598   ins_encode %{
2599     __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
2600     __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
2601     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
2602                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
2603     __ sve_orr(as_FloatRegister($dst$$reg),
2604                as_FloatRegister($src$$reg),
2605                as_FloatRegister($src$$reg));
2606     __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
2607                as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
2608   %}
2609   ins_pipe(pipe_slow);
2610 %}
2611 
2612 // ------------------------------ Vector shuffle -------------------------------
2613 
2614 instruct loadshuffle(vReg dst, vReg src) %{
2615   predicate(UseSVE > 0);
2616   match(Set dst (VectorLoadShuffle src));
2617   ins_cost(SVE_COST);
2618   format %{ "sve_loadshuffle $dst, $src\t# vector load shuffle (B/H/S/D)" %}
2619   ins_encode %{
2620     BasicType bt = Matcher::vector_element_basic_type(this);
2621     if (bt == T_BYTE) {
2622       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
2623         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2624                    as_FloatRegister($src$$reg));
2625       }
2626     } else {
2627       __ sve_vector_extend(as_FloatRegister($dst$$reg),  __ elemType_to_regVariant(bt),
2628                            as_FloatRegister($src$$reg), __ B);
2629     }
2630   %}
2631   ins_pipe(pipe_slow);
2632 %}
2633 
2634 // ------------------------------ Vector rearrange -------------------------------
2635 
2636 instruct rearrange(vReg dst, vReg src, vReg shuffle)
2637 %{
2638   predicate(UseSVE > 0);
2639   match(Set dst (VectorRearrange src shuffle));
2640   ins_cost(SVE_COST);
2641   format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %}
2642   ins_encode %{
2643     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2644     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2645     __ sve_tbl(as_FloatRegister($dst$$reg), size,
2646                as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
2647   %}
2648   ins_pipe(pipe_slow);
2649 %}
2650 
2651 // ------------------------------ Vector Load Gather ---------------------------------
2652 
2653 instruct gatherI(vReg dst, indirect mem, vReg idx) %{
2654   predicate(UseSVE > 0 &&
2655             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2656             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2657              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2658   match(Set dst (LoadVectorGather mem idx));
2659   ins_cost(SVE_COST);
2660   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (S)" %}
2661   ins_encode %{
2662     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
2663                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2664   %}
2665   ins_pipe(pipe_slow);
2666 %}
2667 
2668 instruct gatherL(vReg dst, indirect mem, vReg idx) %{
2669   predicate(UseSVE > 0 &&
2670             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
2671             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2672              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2673   match(Set dst (LoadVectorGather mem idx));
2674   ins_cost(2 * SVE_COST);
2675   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (D)" %}
2676   ins_encode %{
2677     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2678     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base),
2679                        as_FloatRegister($idx$$reg));
2680   %}
2681   ins_pipe(pipe_slow);
2682 %}
2683 
2684 // ------------------------------ Vector Load Gather Partial-------------------------------
2685 
2686 instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2687   predicate(UseSVE > 0 &&
2688             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2689             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2690              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2691   match(Set dst (LoadVectorGather mem idx));
2692   effect(TEMP ptmp, KILL cr);
2693   ins_cost(2 * SVE_COST + INSN_COST);
2694   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (S)" %}
2695   ins_encode %{
2696     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this));
2697     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2698                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2699   %}
2700   ins_pipe(pipe_slow);
2701 %}
2702 
2703 instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2704   predicate(UseSVE > 0 &&
2705             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
2706             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2707              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2708   match(Set dst (LoadVectorGather mem idx));
2709   effect(TEMP ptmp, KILL cr);
2710   ins_cost(3 * SVE_COST + INSN_COST);
2711   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (D)" %}
2712   ins_encode %{
2713     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
2714                           Matcher::vector_length(this));
2715     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2716     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2717                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2718   %}
2719   ins_pipe(pipe_slow);
2720 %}
2721 
2722 // ------------------------------ Vector Load Gather Predicated -------------------------------
2723 
2724 instruct gatherI_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
2725   predicate(UseSVE > 0 &&
2726             n->as_LoadVector()->memory_size() == MaxVectorSize &&
2727             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2728              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2729   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2730   ins_cost(SVE_COST);
2731   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (S)" %}
2732   ins_encode %{
2733     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
2734                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2735   %}
2736   ins_pipe(pipe_slow);
2737 %}
2738 
2739 instruct gatherL_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
2740   predicate(UseSVE > 0 &&
2741             n->as_LoadVector()->memory_size() == MaxVectorSize &&
2742             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2743              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2744   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2745   ins_cost(2 * SVE_COST);
2746   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (D)" %}
2747   ins_encode %{
2748     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2749     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
2750                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2751   %}
2752   ins_pipe(pipe_slow);
2753 %}
2754 
2755 // ------------------------------ Vector Load Gather Predicated Partial -------------------------------
2756 
2757 instruct gatherI_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
2758   predicate(UseSVE > 0 &&
2759             n->as_LoadVector()->memory_size() < MaxVectorSize &&
2760             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2761              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2762   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2763   effect(TEMP ptmp, KILL cr);
2764   ins_cost(3 * SVE_COST);
2765   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (S)" %}
2766   ins_encode %{
2767     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
2768                           Matcher::vector_length(this));
2769     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
2770                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
2771     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2772                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2773   %}
2774   ins_pipe(pipe_slow);
2775 %}
2776 
2777 instruct gatherL_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
2778   predicate(UseSVE > 0 &&
2779             n->as_LoadVector()->memory_size() < MaxVectorSize &&
2780             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2781              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2782   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
2783   effect(TEMP ptmp, KILL cr);
2784   ins_cost(4 * SVE_COST);
2785   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (D)" %}
2786   ins_encode %{
2787     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this));
2788     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
2789                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
2790     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2791     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
2792                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
2793   %}
2794   ins_pipe(pipe_slow);
2795 %}
2796 
2797 // ------------------------------ Vector Store Scatter -------------------------------
2798 
2799 instruct scatterI(indirect mem, vReg src, vReg idx) %{
2800   predicate(UseSVE > 0 &&
2801             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2802             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2803              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2804   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2805   ins_cost(SVE_COST);
2806   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (S)" %}
2807   ins_encode %{
2808     __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
2809                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2810   %}
2811   ins_pipe(pipe_slow);
2812 %}
2813 
2814 instruct scatterL(indirect mem, vReg src, vReg idx) %{
2815   predicate(UseSVE > 0 &&
2816             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
2817             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2818              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2819   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2820   ins_cost(2 * SVE_COST);
2821   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (D)" %}
2822   ins_encode %{
2823     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2824     __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
2825                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2826   %}
2827   ins_pipe(pipe_slow);
2828 %}
2829 
2830 // ------------------------------ Vector Store Scatter Partial -------------------------------
2831 
2832 instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2833   predicate(UseSVE > 0 &&
2834             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2835             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2836              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2837   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2838   effect(TEMP ptmp, KILL cr);
2839   ins_cost(2 * SVE_COST + INSN_COST);
2840   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (S)" %}
2841   ins_encode %{
2842     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
2843                           Matcher::vector_length(this, $src));
2844     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2845                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2846   %}
2847   ins_pipe(pipe_slow);
2848 %}
2849 
2850 instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
2851   predicate(UseSVE > 0 &&
2852             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
2853             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2854              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2855   match(Set mem (StoreVectorScatter mem (Binary src idx)));
2856   effect(TEMP ptmp, KILL cr);
2857   ins_cost(3 * SVE_COST + INSN_COST);
2858   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (D)" %}
2859   ins_encode %{
2860     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
2861                           Matcher::vector_length(this, $src));
2862     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2863     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2864                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2865   %}
2866   ins_pipe(pipe_slow);
2867 %}
2868 
2869 // ------------------------------ Vector Store Scatter Predicated -------------------------------
2870 
2871 instruct scatterI_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
2872   predicate(UseSVE > 0 &&
2873             n->as_StoreVector()->memory_size() == MaxVectorSize &&
2874             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2875              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2876   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
2877   ins_cost(SVE_COST);
2878   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicate (S)" %}
2879   ins_encode %{
2880     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
2881                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2882   %}
2883   ins_pipe(pipe_slow);
2884 %}
2885 
2886 instruct scatterL_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
2887   predicate(UseSVE > 0 &&
2888             n->as_StoreVector()->memory_size() == MaxVectorSize &&
2889             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2890              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2891   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
2892   ins_cost(2 * SVE_COST);
2893   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated (D)" %}
2894   ins_encode %{
2895     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2896     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
2897                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2898   %}
2899   ins_pipe(pipe_slow);
2900 %}
2901 
2902 // ------------------------------ Vector Store Scatter Predicated Partial -------------------------------
2903 
2904 instruct scatterI_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
2905   predicate(UseSVE > 0 &&
2906             n->as_StoreVector()->memory_size() < MaxVectorSize &&
2907             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2908              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2909   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
2910   effect(TEMP ptmp, KILL cr);
2911   ins_cost(3 * SVE_COST);
2912   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (S)" %}
2913   ins_encode %{
2914     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
2915                           Matcher::vector_length(this, $src));
2916     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
2917                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
2918     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2919                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2920   %}
2921   ins_pipe(pipe_slow);
2922 %}
2923 
2924 instruct scatterL_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
2925   predicate(UseSVE > 0 &&
2926             n->as_StoreVector()->memory_size() < MaxVectorSize &&
2927             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2928              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2929   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
2930   effect(TEMP ptmp, KILL cr);
2931   ins_cost(4 * SVE_COST);
2932   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (D)" %}
2933   ins_encode %{
2934     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
2935                           Matcher::vector_length(this, $src));
2936     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
2937                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
2938     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
2939     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
2940                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
2941   %}
2942   ins_pipe(pipe_slow);
2943 %}
2944 
2945 // ------------------------------ Vector Load Const -------------------------------
2946 
2947 instruct loadconB(vReg dst, immI0 src) %{
2948   predicate(UseSVE > 0 &&
2949             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2950   match(Set dst (VectorLoadConst src));
2951   ins_cost(SVE_COST);
2952   format %{ "sve_index $dst, 0, 1\t# generate iota indices" %}
2953   ins_encode %{
2954     __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1);
2955   %}
2956   ins_pipe(pipe_slow);
2957 %}
2958 
2959 // Intrisics for String.indexOf(char)
2960 
2961 dnl
2962 define(`STRING_INDEXOF_CHAR', `
2963 instruct string$1_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
2964                                   iRegI_R0 result, vReg ztmp1, vReg ztmp2,
2965                                   pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
2966 %{
2967   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
2968   predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::$1));
2969   effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
2970 
2971   format %{ "String$2 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
2972 
2973   ins_encode %{
2974     __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
2975                                as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
2976                                as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), $3 /* isL */);
2977   %}
2978   ins_pipe(pipe_class_memory);
2979 %}')dnl
2980 dnl                 $1 $2      $3
2981 STRING_INDEXOF_CHAR(L, Latin1, true)
2982 STRING_INDEXOF_CHAR(U, UTF16,  false)
2983 
2984 // ---------------------------- Vector mask reductions ---------------------------
2985 instruct vmask_truecount(iRegINoSp dst, pReg src) %{
2986   predicate(UseSVE > 0 &&
2987             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
2988   match(Set dst (VectorMaskTrueCount src));
2989   ins_cost(SVE_COST);
2990   format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %}
2991   ins_encode %{
2992     BasicType bt = Matcher::vector_element_basic_type(this, $src);
2993     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
2994     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($src$$reg));
2995   %}
2996   ins_pipe(pipe_slow);
2997 %}
2998 
2999 instruct vmask_firsttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
3000   predicate(UseSVE > 0 &&
3001             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3002   match(Set dst (VectorMaskFirstTrue src));
3003   effect(TEMP ptmp);
3004   ins_cost(2 * SVE_COST);
3005   format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
3006   ins_encode %{
3007     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3008     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3009     __ sve_brkb(as_PRegister($ptmp$$reg), ptrue, as_PRegister($src$$reg), false);
3010     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($ptmp$$reg));
3011   %}
3012   ins_pipe(pipe_slow);
3013 %}
3014 
3015 instruct vmask_lasttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
3016   predicate(UseSVE > 0 &&
3017             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3018   match(Set dst (VectorMaskLastTrue src));
3019   effect(TEMP ptmp);
3020   ins_cost(3 * SVE_COST);
3021   format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
3022   ins_encode %{
3023     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3024     __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($src$$reg), as_PRegister($ptmp$$reg));
3025   %}
3026   ins_pipe(pipe_slow);
3027 %}
3028 
3029 instruct vmask_truecount_partial(iRegINoSp dst, pReg src, pRegGov pgtmp, rFlagsReg cr) %{
3030   predicate(UseSVE > 0 &&
3031             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3032   match(Set dst (VectorMaskTrueCount src));
3033   effect(TEMP pgtmp, KILL cr);
3034   ins_cost(2 * SVE_COST);
3035   format %{ "vmask_truecount_partial $dst, $src\t# vector mask truecount partial (sve)" %}
3036   ins_encode %{
3037     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3038     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3039     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), size, Matcher::vector_length(this, $src));
3040     __ sve_cntp($dst$$Register, size, as_PRegister($pgtmp$$reg), as_PRegister($src$$reg));
3041   %}
3042   ins_pipe(pipe_slow);
3043 %}
3044 
3045 instruct vmask_firsttrue_partial(iRegINoSp dst, pReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
3046   predicate(UseSVE > 0 &&
3047             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3048   match(Set dst (VectorMaskFirstTrue src));
3049   effect(TEMP pgtmp, TEMP ptmp, KILL cr);
3050   ins_cost(3 * SVE_COST);
3051   format %{ "vmask_firsttrue_partial $dst, $src\t# vector mask firsttrue partial (sve)" %}
3052   ins_encode %{
3053     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3054     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3055     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), size,
3056                           Matcher::vector_length(this, $src));
3057     __ sve_brkb(as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg), as_PRegister($src$$reg), false);
3058     __ sve_cntp($dst$$Register, size, as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg));
3059   %}
3060   ins_pipe(pipe_slow);
3061 %}
3062 
3063 instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{
3064   predicate(UseSVE > 0 &&
3065             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3066   match(Set dst (VectorMaskLastTrue src));
3067   effect(TEMP ptmp, KILL cr);
3068   ins_cost(5 * SVE_COST);
3069   format %{ "vmask_lasttrue_partial $dst, $src\t# vector mask lasttrue partial (sve)" %}
3070   ins_encode %{
3071     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3072     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3073     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
3074     __ sve_and(as_PRegister($ptmp$$reg), ptrue, as_PRegister($ptmp$$reg), as_PRegister($src$$reg));
3075     __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
3076   %}
3077   ins_pipe(pipe_slow);
3078 %}dnl