1 //
   2 // Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
  27 
  28 // AArch64 SVE Architecture Description File
  29 
  30 
  31 // 4 bit signed offset -- for predicated load/store
  32 
  33 operand vmemA_immIOffset4()
  34 %{
  35   // (esize / msize) = 1
  36   predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
  37             Matcher::scalable_vector_reg_size(T_BYTE)));
  38   match(ConI);
  39 
  40   op_cost(0);
  41   format %{ %}
  42   interface(CONST_INTER);
  43 %}
  44 
  45 operand vmemA_immLOffset4()
  46 %{
  47   // (esize / msize) = 1
  48   predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
  49             Matcher::scalable_vector_reg_size(T_BYTE)));
  50   match(ConL);
  51 
  52   op_cost(0);
  53   format %{ %}
  54   interface(CONST_INTER);
  55 %}
  56 
  57 operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
  58 %{
  59   constraint(ALLOC_IN_RC(ptr_reg));
  60   match(AddP reg off);
  61   op_cost(0);
  62   format %{ "[$reg, $off]" %}
  63   interface(MEMORY_INTER) %{
  64     base($reg);
  65     index(0xffffffff);
  66     scale(0x0);
  67     disp($off);
  68   %}
  69 %}
  70 
  71 operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
  72 %{
  73   constraint(ALLOC_IN_RC(ptr_reg));
  74   match(AddP reg off);
  75   op_cost(0);
  76   format %{ "[$reg, $off]" %}
  77   interface(MEMORY_INTER) %{
  78     base($reg);
  79     index(0xffffffff);
  80     scale(0x0);
  81     disp($off);
  82   %}
  83 %}
  84 
  85 // The indOff of vmemA is valid only when the vector element (load to/store from)
  86 // size equals to memory element (load from/store to) size.
  87 opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  88 
  89 source_hpp %{
  90   bool op_sve_supported(int opcode, int vlen, BasicType bt);
  91 %}
  92 
  93 source %{
  94 
  95   typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
  96                                                              PRegister Pg, const Address &adr);
  97 
  98   // Predicated load/store, with optional ptrue to all elements of given predicate register.
  99   static void loadStoreA_predicated(C2_MacroAssembler masm, bool is_store, FloatRegister reg,
 100                                     PRegister pg, BasicType mem_elem_bt, BasicType vector_elem_bt,
 101                                     int opcode, Register base, int index, int size, int disp) {
 102     sve_mem_insn_predicate insn;
 103     int mesize = type2aelembytes(mem_elem_bt);
 104     if (index == -1) {
 105       assert(size == 0, "unsupported address mode: scale size = %d", size);
 106       switch(mesize) {
 107       case 1:
 108         insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
 109         break;
 110       case 2:
 111         insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
 112         break;
 113       case 4:
 114         insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
 115         break;
 116       case 8:
 117         insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
 118         break;
 119       default:
 120         assert(false, "unsupported");
 121         ShouldNotReachHere();
 122       }
 123       int imm4 = disp / mesize / Matcher::scalable_vector_reg_size(vector_elem_bt);
 124       (masm.*insn)(reg, Assembler::elemType_to_regVariant(vector_elem_bt), pg, Address(base, imm4));
 125     } else {
 126       assert(false, "unimplemented");
 127       ShouldNotReachHere();
 128     }
 129   }
 130 
 131   bool op_sve_supported(int opcode, int vlen, BasicType bt) {
 132     int length_in_bytes = vlen * type2aelembytes(bt);
 133     switch (opcode) {
 134       case Op_MulAddVS2VI:
 135       // No multiply reduction instructions
 136       case Op_MulReductionVD:
 137       case Op_MulReductionVF:
 138       case Op_MulReductionVI:
 139       case Op_MulReductionVL:
 140       // Others
 141       case Op_ExtractC:
 142       case Op_ExtractUB:
 143         return false;
 144       // Vector API specific
 145       case Op_VectorLoadShuffle:
 146       case Op_VectorRearrange:
 147         if (vlen < 4 || length_in_bytes > MaxVectorSize) {
 148           return false;
 149         } else {
 150           return true;
 151         }
 152       case Op_LoadVector:
 153       case Op_StoreVector:
 154         return Matcher::vector_size_supported(bt, vlen);
 155       default:
 156         break;
 157     }
 158     // By default, we only support vector operations with no less than 8 bytes and 2 elements.
 159     return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
 160   }
 161 %}
 162 
 163 definitions %{
 164   int_def SVE_COST             (200, 200);
 165 %}
 166 
 167 
 168 // All SVE instructions
 169 
 170 // vector load/store
 171 
 172 // Unpredicated vector load/store
 173 instruct loadV(vReg dst, vmemA mem) %{
 174   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16 &&
 175             n->as_LoadVector()->memory_size() == MaxVectorSize);
 176   match(Set dst (LoadVector mem));
 177   ins_cost(4 * SVE_COST);
 178   format %{ "sve_ldr $dst, $mem\t# vector (sve)" %}
 179   ins_encode %{
 180     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 181     BasicType bt = Matcher::vector_element_basic_type(this);
 182     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
 183                           bt, bt, $mem->opcode(),
 184                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 185   %}
 186   ins_pipe(pipe_slow);
 187 %}
 188 
 189 instruct storeV(vReg src, vmemA mem) %{
 190   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16 &&
 191             n->as_StoreVector()->memory_size() == MaxVectorSize);
 192   match(Set mem (StoreVector mem src));
 193   ins_cost(4 * SVE_COST);
 194   format %{ "sve_str $mem, $src\t# vector (sve)" %}
 195   ins_encode %{
 196     FloatRegister src_reg = as_FloatRegister($src$$reg);
 197     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 198     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
 199                           bt, bt, $mem->opcode(),
 200                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 201   %}
 202   ins_pipe(pipe_slow);
 203 %}
 204 
 205 // Load Vector (16 bits)
 206 instruct loadV2_vreg(vReg dst, vmem2 mem)
 207 %{
 208   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 2);
 209   match(Set dst (LoadVector mem));
 210   ins_cost(4 * INSN_COST);
 211   format %{ "ldrh   $dst,$mem\t# vector (16 bits)" %}
 212   ins_encode( aarch64_enc_ldrvH(dst, mem) );
 213   ins_pipe(vload_reg_mem64);
 214 %}
 215 
 216 // Store Vector (16 bits)
 217 instruct storeV2_vreg(vReg src, vmem2 mem)
 218 %{
 219   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 2);
 220   match(Set mem (StoreVector mem src));
 221   ins_cost(4 * INSN_COST);
 222   format %{ "strh   $mem,$src\t# vector (16 bits)" %}
 223   ins_encode( aarch64_enc_strvH(src, mem) );
 224   ins_pipe(vstore_reg_mem64);
 225 %}
 226 
 227 // Load Vector (32 bits)
 228 instruct loadV4_vreg(vReg dst, vmem4 mem)
 229 %{
 230   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 4);
 231   match(Set dst (LoadVector mem));
 232   ins_cost(4 * INSN_COST);
 233   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
 234   ins_encode( aarch64_enc_ldrvS(dst, mem) );
 235   ins_pipe(vload_reg_mem64);
 236 %}
 237 
 238 // Store Vector (32 bits)
 239 instruct storeV4_vreg(vReg src, vmem4 mem)
 240 %{
 241   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 4);
 242   match(Set mem (StoreVector mem src));
 243   ins_cost(4 * INSN_COST);
 244   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
 245   ins_encode( aarch64_enc_strvS(src, mem) );
 246   ins_pipe(vstore_reg_mem64);
 247 %}
 248 
 249 // Load Vector (64 bits)
 250 instruct loadV8_vreg(vReg dst, vmem8 mem)
 251 %{
 252   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 8);
 253   match(Set dst (LoadVector mem));
 254   ins_cost(4 * INSN_COST);
 255   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
 256   ins_encode( aarch64_enc_ldrvD(dst, mem) );
 257   ins_pipe(vload_reg_mem64);
 258 %}
 259 
 260 // Store Vector (64 bits)
 261 instruct storeV8_vreg(vReg src, vmem8 mem)
 262 %{
 263   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 8);
 264   match(Set mem (StoreVector mem src));
 265   ins_cost(4 * INSN_COST);
 266   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
 267   ins_encode( aarch64_enc_strvD(src, mem) );
 268   ins_pipe(vstore_reg_mem64);
 269 %}
 270 
 271 // Load Vector (128 bits)
 272 instruct loadV16_vreg(vReg dst, vmem16 mem)
 273 %{
 274   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() == 16);
 275   match(Set dst (LoadVector mem));
 276   ins_cost(4 * INSN_COST);
 277   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
 278   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
 279   ins_pipe(vload_reg_mem128);
 280 %}
 281 
 282 // Store Vector (128 bits)
 283 instruct storeV16_vreg(vReg src, vmem16 mem)
 284 %{
 285   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() == 16);
 286   match(Set mem (StoreVector mem src));
 287   ins_cost(4 * INSN_COST);
 288   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
 289   ins_encode( aarch64_enc_strvQ(src, mem) );
 290   ins_pipe(vstore_reg_mem128);
 291 %}
 292 
 293 // Predicated vector load/store, based on the vector length of the node.
 294 // Only load/store values in the range of the memory_size. This is needed
 295 // when the memory_size is lower than the hardware supported max vector size.
 296 // And this might happen for Vector API mask vector load/store.
 297 instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
 298   predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
 299             n->as_LoadVector()->memory_size() < MaxVectorSize);
 300   match(Set dst (LoadVector mem));
 301   effect(TEMP pTmp, KILL cr);
 302   ins_cost(6 * SVE_COST);
 303   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
 304             "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %}
 305   ins_encode %{
 306     BasicType bt = Matcher::vector_element_basic_type(this);
 307     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
 308                           Matcher::vector_length(this));
 309     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
 310     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
 311                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
 312                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 313   %}
 314   ins_pipe(pipe_slow);
 315 %}
 316 
 317 instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
 318   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
 319             n->as_StoreVector()->memory_size() < MaxVectorSize);
 320   match(Set mem (StoreVector mem src));
 321   effect(TEMP pTmp, KILL cr);
 322   ins_cost(5 * SVE_COST);
 323   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
 324             "sve_str $src, $pTmp, $mem\t# store vector predicated" %}
 325   ins_encode %{
 326     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 327     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
 328                           Matcher::vector_length(this, $src));
 329     FloatRegister src_reg = as_FloatRegister($src$$reg);
 330     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
 331                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
 332                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
 333   %}
 334   ins_pipe(pipe_slow);
 335 %}
 336 
 337 // vector reinterpret
 338 
 339 instruct reinterpret(vReg dst) %{
 340   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
 341                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst
 342   match(Set dst (VectorReinterpret dst));
 343   ins_cost(0);
 344   format %{ "# reinterpret $dst\t# do nothing" %}
 345   ins_encode %{
 346     // empty
 347   %}
 348   ins_pipe(pipe_class_empty);
 349 %}
 350 
 351 instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{
 352   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
 353                           n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
 354   match(Set dst (VectorReinterpret src));
 355   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
 356   ins_cost(3 * SVE_COST);
 357   format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
 358   ins_encode %{
 359     uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
 360     uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
 361     uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
 362                                   length_in_bytes_src : length_in_bytes_dst;
 363     assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
 364            "invalid vector length");
 365     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize);
 366     __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
 367     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg),
 368                as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
 369   %}
 370   ins_pipe(pipe_slow);
 371 %}
 372 
 373 // vector abs
 374 
 375 instruct vabsB(vReg dst, vReg src) %{
 376   predicate(UseSVE > 0 &&
 377             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 378   match(Set dst (AbsVB src));
 379   ins_cost(SVE_COST);
 380   format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %}
 381   ins_encode %{
 382     __ sve_abs(as_FloatRegister($dst$$reg), __ B,
 383          ptrue, as_FloatRegister($src$$reg));
 384   %}
 385   ins_pipe(pipe_slow);
 386 %}
 387 
 388 instruct vabsS(vReg dst, vReg src) %{
 389   predicate(UseSVE > 0 &&
 390             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 391   match(Set dst (AbsVS src));
 392   ins_cost(SVE_COST);
 393   format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %}
 394   ins_encode %{
 395     __ sve_abs(as_FloatRegister($dst$$reg), __ H,
 396          ptrue, as_FloatRegister($src$$reg));
 397   %}
 398   ins_pipe(pipe_slow);
 399 %}
 400 
 401 instruct vabsI(vReg dst, vReg src) %{
 402   predicate(UseSVE > 0 &&
 403             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 404   match(Set dst (AbsVI src));
 405   ins_cost(SVE_COST);
 406   format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %}
 407   ins_encode %{
 408     __ sve_abs(as_FloatRegister($dst$$reg), __ S,
 409          ptrue, as_FloatRegister($src$$reg));
 410   %}
 411   ins_pipe(pipe_slow);
 412 %}
 413 
 414 instruct vabsL(vReg dst, vReg src) %{
 415   predicate(UseSVE > 0 &&
 416             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 417   match(Set dst (AbsVL src));
 418   ins_cost(SVE_COST);
 419   format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %}
 420   ins_encode %{
 421     __ sve_abs(as_FloatRegister($dst$$reg), __ D,
 422          ptrue, as_FloatRegister($src$$reg));
 423   %}
 424   ins_pipe(pipe_slow);
 425 %}
 426 
 427 instruct vabsF(vReg dst, vReg src) %{
 428   predicate(UseSVE > 0 &&
 429             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 430   match(Set dst (AbsVF src));
 431   ins_cost(SVE_COST);
 432   format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %}
 433   ins_encode %{
 434     __ sve_fabs(as_FloatRegister($dst$$reg), __ S,
 435          ptrue, as_FloatRegister($src$$reg));
 436   %}
 437   ins_pipe(pipe_slow);
 438 %}
 439 
 440 instruct vabsD(vReg dst, vReg src) %{
 441   predicate(UseSVE > 0 &&
 442             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 443   match(Set dst (AbsVD src));
 444   ins_cost(SVE_COST);
 445   format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %}
 446   ins_encode %{
 447     __ sve_fabs(as_FloatRegister($dst$$reg), __ D,
 448          ptrue, as_FloatRegister($src$$reg));
 449   %}
 450   ins_pipe(pipe_slow);
 451 %}
 452 
 453 // vector add
 454 
 455 instruct vaddB(vReg dst, vReg src1, vReg src2) %{
 456   predicate(UseSVE > 0);
 457   match(Set dst (AddVB src1 src2));
 458   ins_cost(SVE_COST);
 459   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
 460   ins_encode %{
 461     __ sve_add(as_FloatRegister($dst$$reg), __ B,
 462          as_FloatRegister($src1$$reg),
 463          as_FloatRegister($src2$$reg));
 464   %}
 465   ins_pipe(pipe_slow);
 466 %}
 467 
 468 instruct vaddS(vReg dst, vReg src1, vReg src2) %{
 469   predicate(UseSVE > 0);
 470   match(Set dst (AddVS src1 src2));
 471   ins_cost(SVE_COST);
 472   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
 473   ins_encode %{
 474     __ sve_add(as_FloatRegister($dst$$reg), __ H,
 475          as_FloatRegister($src1$$reg),
 476          as_FloatRegister($src2$$reg));
 477   %}
 478   ins_pipe(pipe_slow);
 479 %}
 480 
 481 instruct vaddI(vReg dst, vReg src1, vReg src2) %{
 482   predicate(UseSVE > 0);
 483   match(Set dst (AddVI src1 src2));
 484   ins_cost(SVE_COST);
 485   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
 486   ins_encode %{
 487     __ sve_add(as_FloatRegister($dst$$reg), __ S,
 488          as_FloatRegister($src1$$reg),
 489          as_FloatRegister($src2$$reg));
 490   %}
 491   ins_pipe(pipe_slow);
 492 %}
 493 
 494 instruct vaddL(vReg dst, vReg src1, vReg src2) %{
 495   predicate(UseSVE > 0);
 496   match(Set dst (AddVL src1 src2));
 497   ins_cost(SVE_COST);
 498   format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
 499   ins_encode %{
 500     __ sve_add(as_FloatRegister($dst$$reg), __ D,
 501          as_FloatRegister($src1$$reg),
 502          as_FloatRegister($src2$$reg));
 503   %}
 504   ins_pipe(pipe_slow);
 505 %}
 506 
 507 instruct vaddF(vReg dst, vReg src1, vReg src2) %{
 508   predicate(UseSVE > 0);
 509   match(Set dst (AddVF src1 src2));
 510   ins_cost(SVE_COST);
 511   format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
 512   ins_encode %{
 513     __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
 514          as_FloatRegister($src1$$reg),
 515          as_FloatRegister($src2$$reg));
 516   %}
 517   ins_pipe(pipe_slow);
 518 %}
 519 
 520 instruct vaddD(vReg dst, vReg src1, vReg src2) %{
 521   predicate(UseSVE > 0);
 522   match(Set dst (AddVD src1 src2));
 523   ins_cost(SVE_COST);
 524   format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
 525   ins_encode %{
 526     __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
 527          as_FloatRegister($src1$$reg),
 528          as_FloatRegister($src2$$reg));
 529   %}
 530   ins_pipe(pipe_slow);
 531 %}
 532 
 533 // vector and
 534 
 535 instruct vand(vReg dst, vReg src1, vReg src2) %{
 536   predicate(UseSVE > 0);
 537   match(Set dst (AndV src1 src2));
 538   ins_cost(SVE_COST);
 539   format %{ "sve_and  $dst, $src1, $src2\t# vector (sve)" %}
 540   ins_encode %{
 541     __ sve_and(as_FloatRegister($dst$$reg),
 542          as_FloatRegister($src1$$reg),
 543          as_FloatRegister($src2$$reg));
 544   %}
 545   ins_pipe(pipe_slow);
 546 %}
 547 
 548 // vector or
 549 
 550 instruct vor(vReg dst, vReg src1, vReg src2) %{
 551   predicate(UseSVE > 0);
 552   match(Set dst (OrV src1 src2));
 553   ins_cost(SVE_COST);
 554   format %{ "sve_orr  $dst, $src1, $src2\t# vector (sve)" %}
 555   ins_encode %{
 556     __ sve_orr(as_FloatRegister($dst$$reg),
 557          as_FloatRegister($src1$$reg),
 558          as_FloatRegister($src2$$reg));
 559   %}
 560   ins_pipe(pipe_slow);
 561 %}
 562 
 563 // vector xor
 564 
 565 instruct vxor(vReg dst, vReg src1, vReg src2) %{
 566   predicate(UseSVE > 0);
 567   match(Set dst (XorV src1 src2));
 568   ins_cost(SVE_COST);
 569   format %{ "sve_eor  $dst, $src1, $src2\t# vector (sve)" %}
 570   ins_encode %{
 571     __ sve_eor(as_FloatRegister($dst$$reg),
 572          as_FloatRegister($src1$$reg),
 573          as_FloatRegister($src2$$reg));
 574   %}
 575   ins_pipe(pipe_slow);
 576 %}
 577 
 578 // vector not
 579 
 580 instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{
 581   predicate(UseSVE > 0);
 582   match(Set dst (XorV src (ReplicateB m1)));
 583   match(Set dst (XorV src (ReplicateS m1)));
 584   match(Set dst (XorV src (ReplicateI m1)));
 585   ins_cost(SVE_COST);
 586   format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %}
 587   ins_encode %{
 588     __ sve_not(as_FloatRegister($dst$$reg), __ D,
 589                ptrue, as_FloatRegister($src$$reg));
 590   %}
 591   ins_pipe(pipe_slow);
 592 %}
 593 
 594 instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
 595   predicate(UseSVE > 0);
 596   match(Set dst (XorV src (ReplicateL m1)));
 597   ins_cost(SVE_COST);
 598   format %{ "sve_not $dst, $src\t# vector (sve) D" %}
 599   ins_encode %{
 600     __ sve_not(as_FloatRegister($dst$$reg), __ D,
 601                ptrue, as_FloatRegister($src$$reg));
 602   %}
 603   ins_pipe(pipe_slow);
 604 %}
 605 
 606 
 607 // vector and_not
 608 
 609 instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
 610   predicate(UseSVE > 0);
 611   match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
 612   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
 613   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
 614   ins_cost(SVE_COST);
 615   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %}
 616   ins_encode %{
 617     __ sve_bic(as_FloatRegister($dst$$reg),
 618                as_FloatRegister($src1$$reg),
 619                as_FloatRegister($src2$$reg));
 620   %}
 621   ins_pipe(pipe_slow);
 622 %}
 623 
 624 instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{
 625   predicate(UseSVE > 0);
 626   match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
 627   ins_cost(SVE_COST);
 628   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %}
 629   ins_encode %{
 630     __ sve_bic(as_FloatRegister($dst$$reg),
 631                as_FloatRegister($src1$$reg),
 632                as_FloatRegister($src2$$reg));
 633   %}
 634   ins_pipe(pipe_slow);
 635 %}
 636 
 637 
 638 // vector float div
 639 
 640 instruct vdivF(vReg dst_src1, vReg src2) %{
 641   predicate(UseSVE > 0);
 642   match(Set dst_src1 (DivVF dst_src1 src2));
 643   ins_cost(SVE_COST);
 644   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
 645   ins_encode %{
 646     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
 647          ptrue, as_FloatRegister($src2$$reg));
 648   %}
 649   ins_pipe(pipe_slow);
 650 %}
 651 
 652 instruct vdivD(vReg dst_src1, vReg src2) %{
 653   predicate(UseSVE > 0);
 654   match(Set dst_src1 (DivVD dst_src1 src2));
 655   ins_cost(SVE_COST);
 656   format %{ "sve_fdiv  $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
 657   ins_encode %{
 658     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
 659          ptrue, as_FloatRegister($src2$$reg));
 660   %}
 661   ins_pipe(pipe_slow);
 662 %}
 663 
 664 // vector min/max
 665 
 666 instruct vmin(vReg dst_src1, vReg src2) %{
 667   predicate(UseSVE > 0);
 668   match(Set dst_src1 (MinV dst_src1 src2));
 669   ins_cost(SVE_COST);
 670   format %{ "sve_min $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 671   ins_encode %{
 672     BasicType bt = Matcher::vector_element_basic_type(this);
 673     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 674     if (is_floating_point_type(bt)) {
 675       __ sve_fmin(as_FloatRegister($dst_src1$$reg), size,
 676                   ptrue, as_FloatRegister($src2$$reg));
 677     } else {
 678       assert(is_integral_type(bt), "Unsupported type");
 679       __ sve_smin(as_FloatRegister($dst_src1$$reg), size,
 680                   ptrue, as_FloatRegister($src2$$reg));
 681     }
 682   %}
 683   ins_pipe(pipe_slow);
 684 %}
 685 
 686 instruct vmax(vReg dst_src1, vReg src2) %{
 687   predicate(UseSVE > 0);
 688   match(Set dst_src1 (MaxV dst_src1 src2));
 689   ins_cost(SVE_COST);
 690   format %{ "sve_max $dst_src1, $dst_src1, $src2\t # vector (sve)" %}
 691   ins_encode %{
 692     BasicType bt = Matcher::vector_element_basic_type(this);
 693     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
 694     if (is_floating_point_type(bt)) {
 695       __ sve_fmax(as_FloatRegister($dst_src1$$reg), size,
 696                   ptrue, as_FloatRegister($src2$$reg));
 697     } else {
 698       assert(is_integral_type(bt), "Unsupported type");
 699       __ sve_smax(as_FloatRegister($dst_src1$$reg), size,
 700                   ptrue, as_FloatRegister($src2$$reg));
 701     }
 702   %}
 703   ins_pipe(pipe_slow);
 704 %}
 705 
 706 // vector fmla
 707 
 708 // dst_src1 = dst_src1 + src2 * src3
 709 instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
 710   predicate(UseFMA && UseSVE > 0);
 711   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
 712   ins_cost(SVE_COST);
 713   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 714   ins_encode %{
 715     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
 716          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 717   %}
 718   ins_pipe(pipe_slow);
 719 %}
 720 
 721 // dst_src1 = dst_src1 + src2 * src3
 722 instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
 723   predicate(UseFMA && UseSVE > 0);
 724   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
 725   ins_cost(SVE_COST);
 726   format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 727   ins_encode %{
 728     __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
 729          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 730   %}
 731   ins_pipe(pipe_slow);
 732 %}
 733 
 734 // vector fmls
 735 
 736 // dst_src1 = dst_src1 + -src2 * src3
 737 // dst_src1 = dst_src1 + src2 * -src3
 738 instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
 739   predicate(UseFMA && UseSVE > 0);
 740   match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
 741   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
 742   ins_cost(SVE_COST);
 743   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 744   ins_encode %{
 745     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
 746          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 747   %}
 748   ins_pipe(pipe_slow);
 749 %}
 750 
 751 // dst_src1 = dst_src1 + -src2 * src3
 752 // dst_src1 = dst_src1 + src2 * -src3
 753 instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
 754   predicate(UseFMA && UseSVE > 0);
 755   match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
 756   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
 757   ins_cost(SVE_COST);
 758   format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 759   ins_encode %{
 760     __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
 761          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 762   %}
 763   ins_pipe(pipe_slow);
 764 %}
 765 
 766 // vector fnmla
 767 
 768 // dst_src1 = -dst_src1 + -src2 * src3
 769 // dst_src1 = -dst_src1 + src2 * -src3
 770 instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
 771   predicate(UseFMA && UseSVE > 0);
 772   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
 773   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
 774   ins_cost(SVE_COST);
 775   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 776   ins_encode %{
 777     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
 778          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 779   %}
 780   ins_pipe(pipe_slow);
 781 %}
 782 
 783 // dst_src1 = -dst_src1 + -src2 * src3
 784 // dst_src1 = -dst_src1 + src2 * -src3
 785 instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
 786   predicate(UseFMA && UseSVE > 0);
 787   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
 788   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
 789   ins_cost(SVE_COST);
 790   format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 791   ins_encode %{
 792     __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
 793          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 794   %}
 795   ins_pipe(pipe_slow);
 796 %}
 797 
 798 // vector fnmls
 799 
 800 // dst_src1 = -dst_src1 + src2 * src3
 801 instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
 802   predicate(UseFMA && UseSVE > 0);
 803   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
 804   ins_cost(SVE_COST);
 805   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
 806   ins_encode %{
 807     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
 808          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 809   %}
 810   ins_pipe(pipe_slow);
 811 %}
 812 
 813 // dst_src1 = -dst_src1 + src2 * src3
 814 instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
 815   predicate(UseFMA && UseSVE > 0);
 816   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
 817   ins_cost(SVE_COST);
 818   format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
 819   ins_encode %{
 820     __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
 821          ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 822   %}
 823   ins_pipe(pipe_slow);
 824 %}
 825 
 826 // vector mla
 827 
 828 // dst_src1 = dst_src1 + src2 * src3
 829 instruct vmlaB(vReg dst_src1, vReg src2, vReg src3)
 830 %{
 831   predicate(UseSVE > 0);
 832   match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
 833   ins_cost(SVE_COST);
 834   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (B)" %}
 835   ins_encode %{
 836     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ B,
 837       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 838   %}
 839   ins_pipe(pipe_slow);
 840 %}
 841 
 842 // dst_src1 = dst_src1 + src2 * src3
 843 instruct vmlaS(vReg dst_src1, vReg src2, vReg src3)
 844 %{
 845   predicate(UseSVE > 0);
 846   match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
 847   ins_cost(SVE_COST);
 848   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
 849   ins_encode %{
 850     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
 851       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 852   %}
 853   ins_pipe(pipe_slow);
 854 %}
 855 
 856 // dst_src1 = dst_src1 + src2 * src3
 857 instruct vmlaI(vReg dst_src1, vReg src2, vReg src3)
 858 %{
 859   predicate(UseSVE > 0);
 860   match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
 861   ins_cost(SVE_COST);
 862   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
 863   ins_encode %{
 864     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
 865       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 866   %}
 867   ins_pipe(pipe_slow);
 868 %}
 869 
 870 // dst_src1 = dst_src1 + src2 * src3
 871 instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
 872 %{
 873   predicate(UseSVE > 0);
 874   match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
 875   ins_cost(SVE_COST);
 876   format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
 877   ins_encode %{
 878     __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
 879       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 880   %}
 881   ins_pipe(pipe_slow);
 882 %}
 883 
 884 // vector mls
 885 
 886 // dst_src1 = dst_src1 - src2 * src3
 887 instruct vmlsB(vReg dst_src1, vReg src2, vReg src3)
 888 %{
 889   predicate(UseSVE > 0);
 890   match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
 891   ins_cost(SVE_COST);
 892   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (B)" %}
 893   ins_encode %{
 894     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ B,
 895       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 896   %}
 897   ins_pipe(pipe_slow);
 898 %}
 899 
 900 // dst_src1 = dst_src1 - src2 * src3
 901 instruct vmlsS(vReg dst_src1, vReg src2, vReg src3)
 902 %{
 903   predicate(UseSVE > 0);
 904   match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
 905   ins_cost(SVE_COST);
 906   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
 907   ins_encode %{
 908     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
 909       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 910   %}
 911   ins_pipe(pipe_slow);
 912 %}
 913 
 914 // dst_src1 = dst_src1 - src2 * src3
 915 instruct vmlsI(vReg dst_src1, vReg src2, vReg src3)
 916 %{
 917   predicate(UseSVE > 0);
 918   match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
 919   ins_cost(SVE_COST);
 920   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
 921   ins_encode %{
 922     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
 923       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 924   %}
 925   ins_pipe(pipe_slow);
 926 %}
 927 
 928 // dst_src1 = dst_src1 - src2 * src3
 929 instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
 930 %{
 931   predicate(UseSVE > 0);
 932   match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
 933   ins_cost(SVE_COST);
 934   format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
 935   ins_encode %{
 936     __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
 937       ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
 938   %}
 939   ins_pipe(pipe_slow);
 940 %}
 941 
 942 
 943 // vector mul
 944 
 945 instruct vmulB(vReg dst_src1, vReg src2) %{
 946   predicate(UseSVE > 0);
 947   match(Set dst_src1 (MulVB dst_src1 src2));
 948   ins_cost(SVE_COST);
 949   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (B)" %}
 950   ins_encode %{
 951     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B,
 952          ptrue, as_FloatRegister($src2$$reg));
 953   %}
 954   ins_pipe(pipe_slow);
 955 %}
 956 
 957 instruct vmulS(vReg dst_src1, vReg src2) %{
 958   predicate(UseSVE > 0);
 959   match(Set dst_src1 (MulVS dst_src1 src2));
 960   ins_cost(SVE_COST);
 961   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
 962   ins_encode %{
 963     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
 964          ptrue, as_FloatRegister($src2$$reg));
 965   %}
 966   ins_pipe(pipe_slow);
 967 %}
 968 
 969 instruct vmulI(vReg dst_src1, vReg src2) %{
 970   predicate(UseSVE > 0);
 971   match(Set dst_src1 (MulVI dst_src1 src2));
 972   ins_cost(SVE_COST);
 973   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
 974   ins_encode %{
 975     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
 976          ptrue, as_FloatRegister($src2$$reg));
 977   %}
 978   ins_pipe(pipe_slow);
 979 %}
 980 
 981 instruct vmulL(vReg dst_src1, vReg src2) %{
 982   predicate(UseSVE > 0);
 983   match(Set dst_src1 (MulVL dst_src1 src2));
 984   ins_cost(SVE_COST);
 985   format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
 986   ins_encode %{
 987     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
 988          ptrue, as_FloatRegister($src2$$reg));
 989   %}
 990   ins_pipe(pipe_slow);
 991 %}
 992 
 993 instruct vmulF(vReg dst, vReg src1, vReg src2) %{
 994   predicate(UseSVE > 0);
 995   match(Set dst (MulVF src1 src2));
 996   ins_cost(SVE_COST);
 997   format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
 998   ins_encode %{
 999     __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
1000          as_FloatRegister($src1$$reg),
1001          as_FloatRegister($src2$$reg));
1002   %}
1003   ins_pipe(pipe_slow);
1004 %}
1005 
1006 instruct vmulD(vReg dst, vReg src1, vReg src2) %{
1007   predicate(UseSVE > 0);
1008   match(Set dst (MulVD src1 src2));
1009   ins_cost(SVE_COST);
1010   format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
1011   ins_encode %{
1012     __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
1013          as_FloatRegister($src1$$reg),
1014          as_FloatRegister($src2$$reg));
1015   %}
1016   ins_pipe(pipe_slow);
1017 %}
1018 
1019 // vector fneg
1020 
1021 instruct vnegF(vReg dst, vReg src) %{
1022   predicate(UseSVE > 0);
1023   match(Set dst (NegVF src));
1024   ins_cost(SVE_COST);
1025   format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
1026   ins_encode %{
1027     __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
1028          ptrue, as_FloatRegister($src$$reg));
1029   %}
1030   ins_pipe(pipe_slow);
1031 %}
1032 
1033 instruct vnegD(vReg dst, vReg src) %{
1034   predicate(UseSVE > 0);
1035   match(Set dst (NegVD src));
1036   ins_cost(SVE_COST);
1037   format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
1038   ins_encode %{
1039     __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
1040          ptrue, as_FloatRegister($src$$reg));
1041   %}
1042   ins_pipe(pipe_slow);
1043 %}
1044 
1045 // popcount vector
1046 
1047 instruct vpopcountI(vReg dst, vReg src) %{
1048   predicate(UseSVE > 0);
1049   match(Set dst (PopCountVI src));
1050   format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
1051   ins_encode %{
1052      __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
1053   %}
1054   ins_pipe(pipe_slow);
1055 %}
1056 
1057 // vector mask compare
1058 
1059 instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{
1060   predicate(UseSVE > 0);
1061   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
1062   effect(TEMP pTmp, KILL cr);
1063   ins_cost(2 * SVE_COST);
1064   format %{ "sve_cmp $pTmp, $src1, $src2\n\t"
1065             "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %}
1066   ins_encode %{
1067     BasicType bt = Matcher::vector_element_basic_type(this);
1068     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
1069                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
1070     __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
1071                as_PRegister($pTmp$$reg), -1, false);
1072   %}
1073   ins_pipe(pipe_slow);
1074 %}
1075 
1076 // vector blend
1077 
1078 instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{
1079   predicate(UseSVE > 0);
1080   match(Set dst (VectorBlend (Binary src1 src2) src3));
1081   effect(TEMP pTmp, KILL cr);
1082   ins_cost(2 * SVE_COST);
1083   format %{ "sve_cmpeq $pTmp, $src3, -1\n\t"
1084             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
1085   ins_encode %{
1086     Assembler::SIMD_RegVariant size =
1087       __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
1088     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
1089                ptrue, as_FloatRegister($src3$$reg), -1);
1090     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg),
1091                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
1092   %}
1093   ins_pipe(pipe_slow);
1094 %}
1095 
1096 // vector blend with compare
1097 
1098 instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3,
1099                         vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{
1100   predicate(UseSVE > 0);
1101   match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond)));
1102   effect(TEMP pTmp, KILL cr);
1103   ins_cost(2 * SVE_COST);
1104   format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t"
1105             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
1106   ins_encode %{
1107     BasicType bt = Matcher::vector_element_basic_type(this);
1108     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg),
1109                    as_FloatRegister($src4$$reg), (int)$cond$$constant);
1110     __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
1111                as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg),
1112                as_FloatRegister($src1$$reg));
1113   %}
1114   ins_pipe(pipe_slow);
1115 %}
1116 
1117 // vector load mask
1118 
1119 instruct vloadmaskB(vReg dst, vReg src) %{
1120   predicate(UseSVE > 0 &&
1121             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1122   match(Set dst (VectorLoadMask src));
1123   ins_cost(SVE_COST);
1124   format %{ "sve_neg $dst, $src\t# vector load mask (B)" %}
1125   ins_encode %{
1126     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg));
1127   %}
1128   ins_pipe(pipe_slow);
1129 %}
1130 
1131 instruct vloadmaskS(vReg dst, vReg src) %{
1132   predicate(UseSVE > 0 &&
1133             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1134   match(Set dst (VectorLoadMask src));
1135   ins_cost(2 * SVE_COST);
1136   format %{ "sve_uunpklo $dst, H, $src\n\t"
1137             "sve_neg $dst, $dst\t# vector load mask (B to H)" %}
1138   ins_encode %{
1139     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
1140     __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg));
1141   %}
1142   ins_pipe(pipe_slow);
1143 %}
1144 
1145 instruct vloadmaskI(vReg dst, vReg src) %{
1146   predicate(UseSVE > 0 &&
1147             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1148              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
1149   match(Set dst (VectorLoadMask src));
1150   ins_cost(3 * SVE_COST);
1151   format %{ "sve_uunpklo $dst, H, $src\n\t"
1152             "sve_uunpklo $dst, S, $dst\n\t"
1153             "sve_neg $dst, $dst\t# vector load mask (B to S)" %}
1154   ins_encode %{
1155     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
1156     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
1157     __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg));
1158   %}
1159   ins_pipe(pipe_slow);
1160 %}
1161 
1162 instruct vloadmaskL(vReg dst, vReg src) %{
1163   predicate(UseSVE > 0 &&
1164             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
1165              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
1166   match(Set dst (VectorLoadMask src));
1167   ins_cost(4 * SVE_COST);
1168   format %{ "sve_uunpklo $dst, H, $src\n\t"
1169             "sve_uunpklo $dst, S, $dst\n\t"
1170             "sve_uunpklo $dst, D, $dst\n\t"
1171             "sve_neg $dst, $dst\t# vector load mask (B to D)" %}
1172   ins_encode %{
1173     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
1174     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
1175     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
1176     __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg));
1177   %}
1178   ins_pipe(pipe_slow);
1179 %}
1180 
1181 // vector store mask
1182 
1183 instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{
1184   predicate(UseSVE > 0);
1185   match(Set dst (VectorStoreMask src size));
1186   ins_cost(SVE_COST);
1187   format %{ "sve_neg $dst, $src\t# vector store mask (B)" %}
1188   ins_encode %{
1189     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
1190                as_FloatRegister($src$$reg));
1191   %}
1192   ins_pipe(pipe_slow);
1193 %}
1194 
1195 instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{
1196   predicate(UseSVE > 0);
1197   match(Set dst (VectorStoreMask src size));
1198   effect(TEMP_DEF dst, TEMP tmp);
1199   ins_cost(3 * SVE_COST);
1200   format %{ "sve_dup $tmp, H, 0\n\t"
1201             "sve_uzp1 $dst, B, $src, $tmp\n\t"
1202             "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %}
1203   ins_encode %{
1204     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
1205     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
1206                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1207     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
1208                as_FloatRegister($dst$$reg));
1209 
1210   %}
1211   ins_pipe(pipe_slow);
1212 %}
1213 
1214 instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{
1215   predicate(UseSVE > 0);
1216   match(Set dst (VectorStoreMask src size));
1217   effect(TEMP_DEF dst, TEMP tmp);
1218   ins_cost(4 * SVE_COST);
1219   format %{ "sve_dup $tmp, S, 0\n\t"
1220             "sve_uzp1 $dst, H, $src, $tmp\n\t"
1221             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
1222             "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %}
1223   ins_encode %{
1224     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
1225     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
1226                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1227     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
1228                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1229     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
1230                as_FloatRegister($dst$$reg));
1231   %}
1232   ins_pipe(pipe_slow);
1233 %}
1234 
1235 instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{
1236   predicate(UseSVE > 0);
1237   match(Set dst (VectorStoreMask src size));
1238   effect(TEMP_DEF dst, TEMP tmp);
1239   ins_cost(5 * SVE_COST);
1240   format %{ "sve_dup $tmp, D, 0\n\t"
1241             "sve_uzp1 $dst, S, $src, $tmp\n\t"
1242             "sve_uzp1 $dst, H, $dst, $tmp\n\t"
1243             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
1244             "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %}
1245   ins_encode %{
1246     __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0);
1247     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S,
1248                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
1249     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
1250                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1251     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
1252                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1253     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
1254                as_FloatRegister($dst$$reg));
1255   %}
1256   ins_pipe(pipe_slow);
1257 %}
1258 
1259 // load/store mask vector
1260 
1261 instruct vloadmask_loadV_byte(vReg dst, vmemA mem) %{
1262   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize &&
1263             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) == 1);
1264   match(Set dst (VectorLoadMask (LoadVector mem)));
1265   ins_cost(5 * SVE_COST);
1266   format %{ "sve_ld1b $dst, $mem\n\t"
1267             "sve_neg $dst, $dst\t# load vector mask (sve)" %}
1268   ins_encode %{
1269     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
1270     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1271     Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt);
1272     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
1273                           T_BOOLEAN, to_vect_bt, $mem->opcode(),
1274                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1275     __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg);
1276   %}
1277   ins_pipe(pipe_slow);
1278 %}
1279 
1280 instruct vloadmask_loadV_non_byte(vReg dst, indirect mem) %{
1281   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize &&
1282             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
1283   match(Set dst (VectorLoadMask (LoadVector mem)));
1284   ins_cost(5 * SVE_COST);
1285   format %{ "sve_ld1b $dst, $mem\n\t"
1286             "sve_neg $dst, $dst\t# load vector mask (sve)" %}
1287   ins_encode %{
1288     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
1289     BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
1290     Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt);
1291     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
1292                           T_BOOLEAN, to_vect_bt, $mem->opcode(),
1293                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1294     __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg);
1295   %}
1296   ins_pipe(pipe_slow);
1297 %}
1298 
1299 instruct storeV_vstoremask_byte(vmemA mem, vReg src, vReg tmp, immI_1 esize) %{
1300   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() *
1301                           n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize);
1302   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1303   effect(TEMP tmp);
1304   ins_cost(5 * SVE_COST);
1305   format %{ "sve_neg $tmp, $src\n\t"
1306             "sve_st1b $tmp, $mem\t# store vector mask (sve)" %}
1307   ins_encode %{
1308     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1309     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
1310     Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant);
1311     __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue,
1312                as_FloatRegister($src$$reg));
1313     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
1314                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
1315                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1316   %}
1317   ins_pipe(pipe_slow);
1318 %}
1319 
1320 instruct storeV_vstoremask_non_byte(indirect mem, vReg src, vReg tmp, immI_gt_1 esize) %{
1321   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() *
1322                           n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize);
1323   match(Set mem (StoreVector mem (VectorStoreMask src esize)));
1324   effect(TEMP tmp);
1325   ins_cost(5 * SVE_COST);
1326   format %{ "sve_neg $tmp, $src\n\t"
1327             "sve_st1b $tmp, $mem\t# store vector mask (sve)" %}
1328   ins_encode %{
1329     BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
1330     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
1331     Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant);
1332     __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue,
1333                as_FloatRegister($src$$reg));
1334     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
1335                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
1336                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
1337   %}
1338   ins_pipe(pipe_slow);
1339 %}
1340 
1341 // vector add reduction
1342 
1343 instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1344   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1345   match(Set dst (AddReductionVI src1 src2));
1346   effect(TEMP_DEF dst, TEMP vtmp);
1347   ins_cost(SVE_COST);
1348   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %}
1349   ins_encode %{
1350     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1351     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1352     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1353     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1354     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
1355     if (bt == T_BYTE) {
1356       __ sxtb($dst$$Register, $dst$$Register);
1357     } else if (bt == T_SHORT) {
1358       __ sxth($dst$$Register, $dst$$Register);
1359     } else {
1360       assert(bt == T_INT, "unsupported type");
1361     }
1362   %}
1363   ins_pipe(pipe_slow);
1364 %}
1365 
1366 instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1367                              pRegGov ptmp, rFlagsReg cr) %{
1368   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1369   match(Set dst (AddReductionVI src1 src2));
1370   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1371   ins_cost(SVE_COST);
1372   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %}
1373   ins_encode %{
1374     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1375     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1376     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1377                           Matcher::vector_length(this, $src2));
1378     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant,
1379                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1380     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1381     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
1382     if (bt == T_BYTE) {
1383       __ sxtb($dst$$Register, $dst$$Register);
1384     } else if (bt == T_SHORT) {
1385       __ sxth($dst$$Register, $dst$$Register);
1386     } else {
1387       assert(bt == T_INT, "unsupported type");
1388     }
1389   %}
1390   ins_pipe(pipe_slow);
1391 %}
1392 
1393 instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1394   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1395   match(Set dst (AddReductionVL src1 src2));
1396   effect(TEMP_DEF dst, TEMP vtmp);
1397   ins_cost(SVE_COST);
1398   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %}
1399   ins_encode %{
1400     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1401     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1402     __ add($dst$$Register, $dst$$Register, $src1$$Register);
1403   %}
1404   ins_pipe(pipe_slow);
1405 %}
1406 
1407 instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1408                              pRegGov ptmp, rFlagsReg cr) %{
1409   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1410   match(Set dst (AddReductionVL src1 src2));
1411   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1412   ins_cost(SVE_COST);
1413   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %}
1414   ins_encode %{
1415     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1416                           Matcher::vector_length(this, $src2));
1417     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D,
1418                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1419     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1420     __ add($dst$$Register, $dst$$Register, $src1$$Register);
1421   %}
1422   ins_pipe(pipe_slow);
1423 %}
1424 
1425 
1426 instruct reduce_addF(vRegF src1_dst, vReg src2) %{
1427   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1428   match(Set src1_dst (AddReductionVF src1_dst src2));
1429   ins_cost(SVE_COST);
1430   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
1431   ins_encode %{
1432     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
1433          ptrue, as_FloatRegister($src2$$reg));
1434   %}
1435   ins_pipe(pipe_slow);
1436 %}
1437 
1438 instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
1439   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1440   match(Set src1_dst (AddReductionVF src1_dst src2));
1441   ins_cost(SVE_COST);
1442   effect(TEMP ptmp, KILL cr);
1443   format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %}
1444   ins_encode %{
1445     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
1446                           Matcher::vector_length(this, $src2));
1447     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
1448                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1449   %}
1450   ins_pipe(pipe_slow);
1451 %}
1452 
1453 instruct reduce_addD(vRegD src1_dst, vReg src2) %{
1454   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1455   match(Set src1_dst (AddReductionVD src1_dst src2));
1456   ins_cost(SVE_COST);
1457   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
1458   ins_encode %{
1459     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
1460          ptrue, as_FloatRegister($src2$$reg));
1461   %}
1462   ins_pipe(pipe_slow);
1463 %}
1464 
1465 instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
1466   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1467   match(Set src1_dst (AddReductionVD src1_dst src2));
1468   ins_cost(SVE_COST);
1469   effect(TEMP ptmp, KILL cr);
1470   format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %}
1471   ins_encode %{
1472     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1473                           Matcher::vector_length(this, $src2));
1474     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
1475                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1476   %}
1477   ins_pipe(pipe_slow);
1478 %}
1479 
1480 // vector and reduction
1481 
1482 instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1483   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1484             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1485   match(Set dst (AndReductionV src1 src2));
1486   effect(TEMP_DEF dst, TEMP vtmp);
1487   ins_cost(SVE_COST);
1488   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %}
1489   ins_encode %{
1490     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1491     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1492     __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1493     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1494     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
1495     if (bt == T_BYTE) {
1496       __ sxtb($dst$$Register, $dst$$Register);
1497     } else if (bt == T_SHORT) {
1498       __ sxth($dst$$Register, $dst$$Register);
1499     } else {
1500       assert(bt == T_INT, "unsupported type");
1501     }
1502   %}
1503   ins_pipe(pipe_slow);
1504 %}
1505 
1506 instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1507                              pRegGov ptmp, rFlagsReg cr) %{
1508   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1509             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1510   match(Set dst (AndReductionV src1 src2));
1511   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1512   ins_cost(SVE_COST);
1513   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %}
1514   ins_encode %{
1515     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1516     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1517     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1518                           Matcher::vector_length(this, $src2));
1519     __ sve_andv(as_FloatRegister($vtmp$$reg), variant,
1520                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1521     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1522     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
1523     if (bt == T_BYTE) {
1524       __ sxtb($dst$$Register, $dst$$Register);
1525     } else if (bt == T_SHORT) {
1526       __ sxth($dst$$Register, $dst$$Register);
1527     } else {
1528       assert(bt == T_INT, "unsupported type");
1529     }
1530   %}
1531   ins_pipe(pipe_slow);
1532 %}
1533 
1534 instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1535   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1536             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1537   match(Set dst (AndReductionV src1 src2));
1538   effect(TEMP_DEF dst, TEMP vtmp);
1539   ins_cost(SVE_COST);
1540   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %}
1541   ins_encode %{
1542     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1543     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1544     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
1545   %}
1546   ins_pipe(pipe_slow);
1547 %}
1548 
1549 instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1550                              pRegGov ptmp, rFlagsReg cr) %{
1551   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1552             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1553   match(Set dst (AndReductionV src1 src2));
1554   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1555   ins_cost(SVE_COST);
1556   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %}
1557   ins_encode %{
1558     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1559                           Matcher::vector_length(this, $src2));
1560     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D,
1561                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1562     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1563     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
1564   %}
1565   ins_pipe(pipe_slow);
1566 %}
1567 
1568 // vector or reduction
1569 
1570 instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1571   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1572             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1573   match(Set dst (OrReductionV src1 src2));
1574   effect(TEMP_DEF dst, TEMP vtmp);
1575   ins_cost(SVE_COST);
1576   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %}
1577   ins_encode %{
1578     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1579     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1580     __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1581     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1582     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
1583     if (bt == T_BYTE) {
1584       __ sxtb($dst$$Register, $dst$$Register);
1585     } else if (bt == T_SHORT) {
1586       __ sxth($dst$$Register, $dst$$Register);
1587     } else {
1588       assert(bt == T_INT, "unsupported type");
1589     }
1590   %}
1591   ins_pipe(pipe_slow);
1592 %}
1593 
1594 instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1595                              pRegGov ptmp, rFlagsReg cr) %{
1596   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1597             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1598   match(Set dst (OrReductionV src1 src2));
1599   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1600   ins_cost(SVE_COST);
1601   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %}
1602   ins_encode %{
1603     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1604     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1605     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1606                           Matcher::vector_length(this, $src2));
1607     __ sve_orv(as_FloatRegister($vtmp$$reg), variant,
1608                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1609     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1610     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
1611     if (bt == T_BYTE) {
1612       __ sxtb($dst$$Register, $dst$$Register);
1613     } else if (bt == T_SHORT) {
1614       __ sxth($dst$$Register, $dst$$Register);
1615     } else {
1616       assert(bt == T_INT, "unsupported type");
1617     }
1618   %}
1619   ins_pipe(pipe_slow);
1620 %}
1621 
1622 instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1623   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1624             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1625   match(Set dst (OrReductionV src1 src2));
1626   effect(TEMP_DEF dst, TEMP vtmp);
1627   ins_cost(SVE_COST);
1628   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %}
1629   ins_encode %{
1630     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1631     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1632     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
1633   %}
1634   ins_pipe(pipe_slow);
1635 %}
1636 
1637 instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1638                              pRegGov ptmp, rFlagsReg cr) %{
1639   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1640             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1641   match(Set dst (OrReductionV src1 src2));
1642   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1643   ins_cost(SVE_COST);
1644   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %}
1645   ins_encode %{
1646     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1647                           Matcher::vector_length(this, $src2));
1648     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D,
1649                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1650     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1651     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
1652   %}
1653   ins_pipe(pipe_slow);
1654 %}
1655 
1656 // vector xor reduction
1657 
1658 instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1659   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1660             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1661   match(Set dst (XorReductionV src1 src2));
1662   effect(TEMP_DEF dst, TEMP vtmp);
1663   ins_cost(SVE_COST);
1664   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %}
1665   ins_encode %{
1666     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1667     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1668     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1669     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1670     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
1671     if (bt == T_BYTE) {
1672       __ sxtb($dst$$Register, $dst$$Register);
1673     } else if (bt == T_SHORT) {
1674       __ sxth($dst$$Register, $dst$$Register);
1675     } else {
1676       assert(bt == T_INT, "unsupported type");
1677     }
1678   %}
1679   ins_pipe(pipe_slow);
1680 %}
1681 
1682 instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1683                              pRegGov ptmp, rFlagsReg cr) %{
1684   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
1685             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1686   match(Set dst (XorReductionV src1 src2));
1687   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1688   ins_cost(SVE_COST);
1689   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %}
1690   ins_encode %{
1691     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1692     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1693     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1694                           Matcher::vector_length(this, $src2));
1695     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant,
1696                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1697     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1698     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
1699     if (bt == T_BYTE) {
1700       __ sxtb($dst$$Register, $dst$$Register);
1701     } else if (bt == T_SHORT) {
1702       __ sxth($dst$$Register, $dst$$Register);
1703     } else {
1704       assert(bt == T_INT, "unsupported type");
1705     }
1706   %}
1707   ins_pipe(pipe_slow);
1708 %}
1709 
1710 instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1711   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1712             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1713   match(Set dst (XorReductionV src1 src2));
1714   effect(TEMP_DEF dst, TEMP vtmp);
1715   ins_cost(SVE_COST);
1716   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %}
1717   ins_encode %{
1718     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1719     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1720     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
1721   %}
1722   ins_pipe(pipe_slow);
1723 %}
1724 
1725 instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1726                              pRegGov ptmp, rFlagsReg cr) %{
1727   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
1728             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1729   match(Set dst (XorReductionV src1 src2));
1730   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1731   ins_cost(SVE_COST);
1732   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %}
1733   ins_encode %{
1734     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1735                           Matcher::vector_length(this, $src2));
1736     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D,
1737                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1738     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1739     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
1740   %}
1741   ins_pipe(pipe_slow);
1742 %}
1743 
1744 
1745 // vector max reduction
1746 
1747 instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1748   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1749             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1750              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1751              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1752   match(Set dst (MaxReductionV src1 src2));
1753   effect(TEMP_DEF dst, TEMP vtmp);
1754   ins_cost(SVE_COST);
1755   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxB/S/I (sve)" %}
1756   ins_encode %{
1757     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1758     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1759     __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1760     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1761     __ cmpw($dst$$Register, $src1$$Register);
1762     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
1763   %}
1764   ins_pipe(pipe_slow);
1765 %}
1766 
1767 instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1768                              pRegGov ptmp, rFlagsReg cr) %{
1769   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1770             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1771              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1772              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1773   match(Set dst (MaxReductionV src1 src2));
1774   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1775   ins_cost(SVE_COST);
1776   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxI partial (sve)" %}
1777   ins_encode %{
1778     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1779     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1780     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1781                           Matcher::vector_length(this, $src2));
1782     __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant,
1783                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1784     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1785     __ cmpw($dst$$Register, $src1$$Register);
1786     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
1787   %}
1788   ins_pipe(pipe_slow);
1789 %}
1790 
1791 instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1792   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1793             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1794   match(Set dst (MaxReductionV src1 src2));
1795   effect(TEMP_DEF dst, TEMP vtmp);
1796   ins_cost(SVE_COST);
1797   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %}
1798   ins_encode %{
1799     __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1800     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1801     __ cmp($dst$$Register, $src1$$Register);
1802     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
1803   %}
1804   ins_pipe(pipe_slow);
1805 %}
1806 
1807 instruct reduce_maxL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1808                              pRegGov ptmp, rFlagsReg cr) %{
1809   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1810             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1811   match(Set dst (MaxReductionV src1 src2));
1812   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1813   ins_cost(SVE_COST);
1814   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %}
1815   ins_encode %{
1816     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1817                           Matcher::vector_length(this, $src2));
1818     __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D,
1819                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1820     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1821     __ cmp($dst$$Register, $src1$$Register);
1822     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
1823   %}
1824   ins_pipe(pipe_slow);
1825 %}
1826 
1827 instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{
1828   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
1829             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1830   match(Set dst (MaxReductionV src1 src2));
1831   ins_cost(INSN_COST);
1832   effect(TEMP_DEF dst);
1833   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
1834             "fmaxs $dst, $dst, $src1\t# max reduction F" %}
1835   ins_encode %{
1836     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
1837          ptrue, as_FloatRegister($src2$$reg));
1838     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1839   %}
1840   ins_pipe(pipe_slow);
1841 %}
1842 
1843 instruct reduce_maxF_partial(vRegF dst, vRegF src1, vReg src2,
1844                              pRegGov ptmp, rFlagsReg cr) %{
1845   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
1846             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1847   match(Set dst (MaxReductionV src1 src2));
1848   ins_cost(INSN_COST);
1849   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1850   format %{ "sve_reduce_maxF $dst, $src1, $src2\t# reduce max S partial (sve)" %}
1851   ins_encode %{
1852     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
1853                           Matcher::vector_length(this, $src2));
1854     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
1855          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1856     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1857   %}
1858   ins_pipe(pipe_slow);
1859 %}
1860 
1861 instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{
1862   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
1863             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1864   match(Set dst (MaxReductionV src1 src2));
1865   ins_cost(INSN_COST);
1866   effect(TEMP_DEF dst);
1867   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (D)\n\t"
1868             "fmaxs $dst, $dst, $src1\t# max reduction D" %}
1869   ins_encode %{
1870     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
1871          ptrue, as_FloatRegister($src2$$reg));
1872     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1873   %}
1874   ins_pipe(pipe_slow);
1875 %}
1876 
1877 instruct reduce_maxD_partial(vRegD dst, vRegD src1, vReg src2,
1878                              pRegGov ptmp, rFlagsReg cr) %{
1879   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
1880             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1881   match(Set dst (MaxReductionV src1 src2));
1882   ins_cost(INSN_COST);
1883   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
1884   format %{ "sve_reduce_maxD $dst, $src1, $src2\t# reduce max D partial (sve)" %}
1885   ins_encode %{
1886     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1887                           Matcher::vector_length(this, $src2));
1888     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
1889          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1890     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1891   %}
1892   ins_pipe(pipe_slow);
1893 %}
1894 
1895 // vector min reduction
1896 
1897 instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
1898   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1899             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1900              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1901              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1902   match(Set dst (MinReductionV src1 src2));
1903   effect(TEMP_DEF dst, TEMP vtmp);
1904   ins_cost(SVE_COST);
1905   format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minB/S/I (sve)" %}
1906   ins_encode %{
1907     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1908     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1909     __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
1910     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1911     __ cmpw($dst$$Register, $src1$$Register);
1912     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
1913   %}
1914   ins_pipe(pipe_slow);
1915 %}
1916 
1917 instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
1918                              pRegGov ptmp, rFlagsReg cr) %{
1919   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1920             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1921              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1922              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
1923   match(Set dst (MinReductionV src1 src2));
1924   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1925   ins_cost(SVE_COST);
1926   format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minI partial (sve)" %}
1927   ins_encode %{
1928     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
1929     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
1930     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
1931                           Matcher::vector_length(this, $src2));
1932     __ sve_sminv(as_FloatRegister($vtmp$$reg), variant,
1933                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1934     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
1935     __ cmpw($dst$$Register, $src1$$Register);
1936     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
1937   %}
1938   ins_pipe(pipe_slow);
1939 %}
1940 
1941 instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
1942   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
1943             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1944   match(Set dst (MinReductionV src1 src2));
1945   effect(TEMP_DEF dst, TEMP vtmp);
1946   ins_cost(SVE_COST);
1947   format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %}
1948   ins_encode %{
1949     __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
1950     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1951     __ cmp($dst$$Register, $src1$$Register);
1952     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
1953   %}
1954   ins_pipe(pipe_slow);
1955 %}
1956 
1957 instruct reduce_minL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
1958                              pRegGov ptmp, rFlagsReg cr) %{
1959   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
1960             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1961   match(Set dst (MinReductionV src1 src2));
1962   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
1963   ins_cost(SVE_COST);
1964   format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %}
1965   ins_encode %{
1966     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
1967                           Matcher::vector_length(this, $src2));
1968     __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D,
1969                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
1970     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
1971     __ cmp($dst$$Register, $src1$$Register);
1972     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
1973   %}
1974   ins_pipe(pipe_slow);
1975 %}
1976 
1977 instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{
1978   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
1979             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
1980   match(Set dst (MinReductionV src1 src2));
1981   ins_cost(INSN_COST);
1982   effect(TEMP_DEF dst);
1983   format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
1984             "fmins $dst, $dst, $src1\t# min reduction F" %}
1985   ins_encode %{
1986     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
1987          ptrue, as_FloatRegister($src2$$reg));
1988     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
1989   %}
1990   ins_pipe(pipe_slow);
1991 %}
1992 
1993 instruct reduce_minF_partial(vRegF dst, vRegF src1, vReg src2,
1994                              pRegGov ptmp, rFlagsReg cr) %{
1995   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
1996             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
1997   match(Set dst (MinReductionV src1 src2));
1998   ins_cost(INSN_COST);
1999   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
2000   format %{ "sve_reduce_minF $dst, $src1, $src2\t# reduce min S partial (sve)" %}
2001   ins_encode %{
2002     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
2003                           Matcher::vector_length(this, $src2));
2004     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
2005          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
2006     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
2007   %}
2008   ins_pipe(pipe_slow);
2009 %}
2010 
2011 instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{
2012   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
2013             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
2014   match(Set dst (MinReductionV src1 src2));
2015   ins_cost(INSN_COST);
2016   effect(TEMP_DEF dst);
2017   format %{ "sve_fminv $dst, $src2 # vector (sve) (D)\n\t"
2018             "fmins $dst, $dst, $src1\t# min reduction D" %}
2019   ins_encode %{
2020     __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
2021          ptrue, as_FloatRegister($src2$$reg));
2022     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
2023   %}
2024   ins_pipe(pipe_slow);
2025 %}
2026 
2027 instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2,
2028                              pRegGov ptmp, rFlagsReg cr) %{
2029   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
2030             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
2031   match(Set dst (MinReductionV src1 src2));
2032   ins_cost(INSN_COST);
2033   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
2034   format %{ "sve_reduce_minD $dst, $src1, $src2\t# reduce min D partial (sve)" %}
2035   ins_encode %{
2036     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
2037                           Matcher::vector_length(this, $src2));
2038     __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
2039          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
2040     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
2041   %}
2042   ins_pipe(pipe_slow);
2043 %}
2044 
2045 // vector Math.rint, floor, ceil
2046 
2047 instruct vroundD(vReg dst, vReg src, immI rmode) %{
2048   predicate(UseSVE > 0 &&
2049             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2050   match(Set dst (RoundDoubleModeV src rmode));
2051   format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
2052   ins_encode %{
2053     switch ($rmode$$constant) {
2054       case RoundDoubleModeNode::rmode_rint:
2055         __ sve_frintn(as_FloatRegister($dst$$reg), __ D,
2056              ptrue, as_FloatRegister($src$$reg));
2057         break;
2058       case RoundDoubleModeNode::rmode_floor:
2059         __ sve_frintm(as_FloatRegister($dst$$reg), __ D,
2060              ptrue, as_FloatRegister($src$$reg));
2061         break;
2062       case RoundDoubleModeNode::rmode_ceil:
2063         __ sve_frintp(as_FloatRegister($dst$$reg), __ D,
2064              ptrue, as_FloatRegister($src$$reg));
2065         break;
2066     }
2067   %}
2068   ins_pipe(pipe_slow);
2069 %}
2070 
2071 // vector replicate
2072 
2073 instruct replicateB(vReg dst, iRegIorL2I src) %{
2074   predicate(UseSVE > 0);
2075   match(Set dst (ReplicateB src));
2076   ins_cost(SVE_COST);
2077   format %{ "sve_dup  $dst, $src\t# vector (sve) (B)" %}
2078   ins_encode %{
2079     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
2080   %}
2081   ins_pipe(pipe_slow);
2082 %}
2083 
2084 instruct replicateS(vReg dst, iRegIorL2I src) %{
2085   predicate(UseSVE > 0);
2086   match(Set dst (ReplicateS src));
2087   ins_cost(SVE_COST);
2088   format %{ "sve_dup  $dst, $src\t# vector (sve) (H)" %}
2089   ins_encode %{
2090     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
2091   %}
2092   ins_pipe(pipe_slow);
2093 %}
2094 
2095 instruct replicateI(vReg dst, iRegIorL2I src) %{
2096   predicate(UseSVE > 0);
2097   match(Set dst (ReplicateI src));
2098   ins_cost(SVE_COST);
2099   format %{ "sve_dup  $dst, $src\t# vector (sve) (S)" %}
2100   ins_encode %{
2101     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
2102   %}
2103   ins_pipe(pipe_slow);
2104 %}
2105 
2106 instruct replicateL(vReg dst, iRegL src) %{
2107   predicate(UseSVE > 0);
2108   match(Set dst (ReplicateL src));
2109   ins_cost(SVE_COST);
2110   format %{ "sve_dup  $dst, $src\t# vector (sve) (D)" %}
2111   ins_encode %{
2112     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
2113   %}
2114   ins_pipe(pipe_slow);
2115 %}
2116 
2117 instruct replicateB_imm8(vReg dst, immI8 con) %{
2118   predicate(UseSVE > 0);
2119   match(Set dst (ReplicateB con));
2120   ins_cost(SVE_COST);
2121   format %{ "sve_dup  $dst, $con\t# vector (sve) (B)" %}
2122   ins_encode %{
2123     __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
2124   %}
2125   ins_pipe(pipe_slow);
2126 %}
2127 
2128 instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{
2129   predicate(UseSVE > 0);
2130   match(Set dst (ReplicateS con));
2131   ins_cost(SVE_COST);
2132   format %{ "sve_dup  $dst, $con\t# vector (sve) (H)" %}
2133   ins_encode %{
2134     __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
2135   %}
2136   ins_pipe(pipe_slow);
2137 %}
2138 
2139 instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{
2140   predicate(UseSVE > 0);
2141   match(Set dst (ReplicateI con));
2142   ins_cost(SVE_COST);
2143   format %{ "sve_dup  $dst, $con\t# vector (sve) (S)" %}
2144   ins_encode %{
2145     __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
2146   %}
2147   ins_pipe(pipe_slow);
2148 %}
2149 
2150 instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{
2151   predicate(UseSVE > 0);
2152   match(Set dst (ReplicateL con));
2153   ins_cost(SVE_COST);
2154   format %{ "sve_dup  $dst, $con\t# vector (sve) (D)" %}
2155   ins_encode %{
2156     __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
2157   %}
2158   ins_pipe(pipe_slow);
2159 %}
2160 
2161 instruct replicateF(vReg dst, vRegF src) %{
2162   predicate(UseSVE > 0);
2163   match(Set dst (ReplicateF src));
2164   ins_cost(SVE_COST);
2165   format %{ "sve_cpy  $dst, $src\t# vector (sve) (S)" %}
2166   ins_encode %{
2167     __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
2168          ptrue, as_FloatRegister($src$$reg));
2169   %}
2170   ins_pipe(pipe_slow);
2171 %}
2172 
2173 instruct replicateD(vReg dst, vRegD src) %{
2174   predicate(UseSVE > 0);
2175   match(Set dst (ReplicateD src));
2176   ins_cost(SVE_COST);
2177   format %{ "sve_cpy  $dst, $src\t# vector (sve) (D)" %}
2178   ins_encode %{
2179     __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
2180          ptrue, as_FloatRegister($src$$reg));
2181   %}
2182   ins_pipe(pipe_slow);
2183 %}
2184 
2185 // vector shift
2186 
2187 instruct vasrB(vReg dst, vReg shift) %{
2188   predicate(UseSVE > 0);
2189   match(Set dst (RShiftVB dst shift));
2190   ins_cost(SVE_COST);
2191   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
2192   ins_encode %{
2193     __ sve_asr(as_FloatRegister($dst$$reg), __ B,
2194          ptrue, as_FloatRegister($shift$$reg));
2195   %}
2196   ins_pipe(pipe_slow);
2197 %}
2198 
2199 instruct vasrS(vReg dst, vReg shift) %{
2200   predicate(UseSVE > 0);
2201   match(Set dst (RShiftVS dst shift));
2202   ins_cost(SVE_COST);
2203   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
2204   ins_encode %{
2205     __ sve_asr(as_FloatRegister($dst$$reg), __ H,
2206          ptrue, as_FloatRegister($shift$$reg));
2207   %}
2208   ins_pipe(pipe_slow);
2209 %}
2210 
2211 instruct vasrI(vReg dst, vReg shift) %{
2212   predicate(UseSVE > 0);
2213   match(Set dst (RShiftVI dst shift));
2214   ins_cost(SVE_COST);
2215   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
2216   ins_encode %{
2217     __ sve_asr(as_FloatRegister($dst$$reg), __ S,
2218          ptrue, as_FloatRegister($shift$$reg));
2219   %}
2220   ins_pipe(pipe_slow);
2221 %}
2222 
2223 instruct vasrL(vReg dst, vReg shift) %{
2224   predicate(UseSVE > 0);
2225   match(Set dst (RShiftVL dst shift));
2226   ins_cost(SVE_COST);
2227   format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
2228   ins_encode %{
2229     __ sve_asr(as_FloatRegister($dst$$reg), __ D,
2230          ptrue, as_FloatRegister($shift$$reg));
2231   %}
2232   ins_pipe(pipe_slow);
2233 %}
2234 
2235 instruct vlslB(vReg dst, vReg shift) %{
2236   predicate(UseSVE > 0);
2237   match(Set dst (LShiftVB dst shift));
2238   ins_cost(SVE_COST);
2239   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
2240   ins_encode %{
2241     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
2242          ptrue, as_FloatRegister($shift$$reg));
2243   %}
2244   ins_pipe(pipe_slow);
2245 %}
2246 
2247 instruct vlslS(vReg dst, vReg shift) %{
2248   predicate(UseSVE > 0);
2249   match(Set dst (LShiftVS dst shift));
2250   ins_cost(SVE_COST);
2251   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
2252   ins_encode %{
2253     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
2254          ptrue, as_FloatRegister($shift$$reg));
2255   %}
2256   ins_pipe(pipe_slow);
2257 %}
2258 
2259 instruct vlslI(vReg dst, vReg shift) %{
2260   predicate(UseSVE > 0);
2261   match(Set dst (LShiftVI dst shift));
2262   ins_cost(SVE_COST);
2263   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
2264   ins_encode %{
2265     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
2266          ptrue, as_FloatRegister($shift$$reg));
2267   %}
2268   ins_pipe(pipe_slow);
2269 %}
2270 
2271 instruct vlslL(vReg dst, vReg shift) %{
2272   predicate(UseSVE > 0);
2273   match(Set dst (LShiftVL dst shift));
2274   ins_cost(SVE_COST);
2275   format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
2276   ins_encode %{
2277     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
2278          ptrue, as_FloatRegister($shift$$reg));
2279   %}
2280   ins_pipe(pipe_slow);
2281 %}
2282 
2283 instruct vlsrB(vReg dst, vReg shift) %{
2284   predicate(UseSVE > 0);
2285   match(Set dst (URShiftVB dst shift));
2286   ins_cost(SVE_COST);
2287   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
2288   ins_encode %{
2289     __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
2290          ptrue, as_FloatRegister($shift$$reg));
2291   %}
2292   ins_pipe(pipe_slow);
2293 %}
2294 
2295 instruct vlsrS(vReg dst, vReg shift) %{
2296   predicate(UseSVE > 0);
2297   match(Set dst (URShiftVS dst shift));
2298   ins_cost(SVE_COST);
2299   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
2300   ins_encode %{
2301     __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
2302          ptrue, as_FloatRegister($shift$$reg));
2303   %}
2304   ins_pipe(pipe_slow);
2305 %}
2306 
2307 instruct vlsrI(vReg dst, vReg shift) %{
2308   predicate(UseSVE > 0);
2309   match(Set dst (URShiftVI dst shift));
2310   ins_cost(SVE_COST);
2311   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
2312   ins_encode %{
2313     __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
2314          ptrue, as_FloatRegister($shift$$reg));
2315   %}
2316   ins_pipe(pipe_slow);
2317 %}
2318 
2319 instruct vlsrL(vReg dst, vReg shift) %{
2320   predicate(UseSVE > 0);
2321   match(Set dst (URShiftVL dst shift));
2322   ins_cost(SVE_COST);
2323   format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
2324   ins_encode %{
2325     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
2326          ptrue, as_FloatRegister($shift$$reg));
2327   %}
2328   ins_pipe(pipe_slow);
2329 %}
2330 
2331 instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
2332   predicate(UseSVE > 0);
2333   match(Set dst (RShiftVB src (RShiftCntV shift)));
2334   ins_cost(SVE_COST);
2335   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
2336   ins_encode %{
2337     int con = (int)$shift$$constant;
2338     if (con == 0) {
2339       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2340            as_FloatRegister($src$$reg));
2341       return;
2342     }
2343     if (con >= 8) con = 7;
2344     __ sve_asr(as_FloatRegister($dst$$reg), __ B,
2345          as_FloatRegister($src$$reg), con);
2346   %}
2347   ins_pipe(pipe_slow);
2348 %}
2349 
2350 instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
2351   predicate(UseSVE > 0);
2352   match(Set dst (RShiftVS src (RShiftCntV shift)));
2353   ins_cost(SVE_COST);
2354   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
2355   ins_encode %{
2356     int con = (int)$shift$$constant;
2357     if (con == 0) {
2358       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2359            as_FloatRegister($src$$reg));
2360       return;
2361     }
2362     if (con >= 16) con = 15;
2363     __ sve_asr(as_FloatRegister($dst$$reg), __ H,
2364          as_FloatRegister($src$$reg), con);
2365   %}
2366   ins_pipe(pipe_slow);
2367 %}
2368 
2369 instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
2370   predicate(UseSVE > 0);
2371   match(Set dst (RShiftVI src (RShiftCntV shift)));
2372   ins_cost(SVE_COST);
2373   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
2374   ins_encode %{
2375     int con = (int)$shift$$constant;
2376     if (con == 0) {
2377       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2378            as_FloatRegister($src$$reg));
2379       return;
2380     }
2381     __ sve_asr(as_FloatRegister($dst$$reg), __ S,
2382          as_FloatRegister($src$$reg), con);
2383   %}
2384   ins_pipe(pipe_slow);
2385 %}
2386 
2387 instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
2388   predicate(UseSVE > 0);
2389   match(Set dst (RShiftVL src (RShiftCntV shift)));
2390   ins_cost(SVE_COST);
2391   format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
2392   ins_encode %{
2393     int con = (int)$shift$$constant;
2394     if (con == 0) {
2395       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2396            as_FloatRegister($src$$reg));
2397       return;
2398     }
2399     __ sve_asr(as_FloatRegister($dst$$reg), __ D,
2400          as_FloatRegister($src$$reg), con);
2401   %}
2402   ins_pipe(pipe_slow);
2403 %}
2404 
2405 instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
2406   predicate(UseSVE > 0);
2407   match(Set dst (URShiftVB src (RShiftCntV shift)));
2408   ins_cost(SVE_COST);
2409   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
2410   ins_encode %{
2411     int con = (int)$shift$$constant;
2412     if (con == 0) {
2413       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2414            as_FloatRegister($src$$reg));
2415       return;
2416     }
2417     if (con >= 8) {
2418       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2419            as_FloatRegister($src$$reg));
2420       return;
2421     }
2422     __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
2423          as_FloatRegister($src$$reg), con);
2424   %}
2425   ins_pipe(pipe_slow);
2426 %}
2427 
2428 instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
2429   predicate(UseSVE > 0);
2430   match(Set dst (URShiftVS src (RShiftCntV shift)));
2431   ins_cost(SVE_COST);
2432   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
2433   ins_encode %{
2434     int con = (int)$shift$$constant;
2435     if (con == 0) {
2436       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2437            as_FloatRegister($src$$reg));
2438       return;
2439     }
2440     if (con >= 16) {
2441       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2442            as_FloatRegister($src$$reg));
2443       return;
2444     }
2445     __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
2446          as_FloatRegister($src$$reg), con);
2447   %}
2448   ins_pipe(pipe_slow);
2449 %}
2450 
2451 instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
2452   predicate(UseSVE > 0);
2453   match(Set dst (URShiftVI src (RShiftCntV shift)));
2454   ins_cost(SVE_COST);
2455   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
2456   ins_encode %{
2457     int con = (int)$shift$$constant;
2458     if (con == 0) {
2459       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2460            as_FloatRegister($src$$reg));
2461       return;
2462     }
2463     __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
2464          as_FloatRegister($src$$reg), con);
2465   %}
2466   ins_pipe(pipe_slow);
2467 %}
2468 
2469 instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
2470   predicate(UseSVE > 0);
2471   match(Set dst (URShiftVL src (RShiftCntV shift)));
2472   ins_cost(SVE_COST);
2473   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
2474   ins_encode %{
2475     int con = (int)$shift$$constant;
2476     if (con == 0) {
2477       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2478            as_FloatRegister($src$$reg));
2479       return;
2480     }
2481     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
2482          as_FloatRegister($src$$reg), con);
2483   %}
2484   ins_pipe(pipe_slow);
2485 %}
2486 
2487 instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
2488   predicate(UseSVE > 0);
2489   match(Set dst (LShiftVB src (LShiftCntV shift)));
2490   ins_cost(SVE_COST);
2491   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
2492   ins_encode %{
2493     int con = (int)$shift$$constant;
2494     if (con >= 8) {
2495       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2496            as_FloatRegister($src$$reg));
2497       return;
2498     }
2499     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
2500          as_FloatRegister($src$$reg), con);
2501   %}
2502   ins_pipe(pipe_slow);
2503 %}
2504 
2505 instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
2506   predicate(UseSVE > 0);
2507   match(Set dst (LShiftVS src (LShiftCntV shift)));
2508   ins_cost(SVE_COST);
2509   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
2510   ins_encode %{
2511     int con = (int)$shift$$constant;
2512     if (con >= 16) {
2513       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
2514            as_FloatRegister($src$$reg));
2515       return;
2516     }
2517     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
2518          as_FloatRegister($src$$reg), con);
2519   %}
2520   ins_pipe(pipe_slow);
2521 %}
2522 
2523 instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
2524   predicate(UseSVE > 0);
2525   match(Set dst (LShiftVI src (LShiftCntV shift)));
2526   ins_cost(SVE_COST);
2527   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
2528   ins_encode %{
2529     int con = (int)$shift$$constant;
2530     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
2531          as_FloatRegister($src$$reg), con);
2532   %}
2533   ins_pipe(pipe_slow);
2534 %}
2535 
2536 instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
2537   predicate(UseSVE > 0);
2538   match(Set dst (LShiftVL src (LShiftCntV shift)));
2539   ins_cost(SVE_COST);
2540   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
2541   ins_encode %{
2542     int con = (int)$shift$$constant;
2543     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
2544          as_FloatRegister($src$$reg), con);
2545   %}
2546   ins_pipe(pipe_slow);
2547 %}
2548 
2549 instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
2550   predicate(UseSVE > 0 &&
2551             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
2552   match(Set dst (LShiftCntV cnt));
2553   match(Set dst (RShiftCntV cnt));
2554   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
2555   ins_encode %{
2556     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
2557   %}
2558   ins_pipe(pipe_slow);
2559 %}
2560 
2561 instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
2562   predicate(UseSVE > 0 &&
2563             (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
2564             (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
2565   match(Set dst (LShiftCntV cnt));
2566   match(Set dst (RShiftCntV cnt));
2567   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
2568   ins_encode %{
2569     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
2570   %}
2571   ins_pipe(pipe_slow);
2572 %}
2573 
2574 instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
2575   predicate(UseSVE > 0 &&
2576             (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
2577   match(Set dst (LShiftCntV cnt));
2578   match(Set dst (RShiftCntV cnt));
2579   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
2580   ins_encode %{
2581     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
2582   %}
2583   ins_pipe(pipe_slow);
2584 %}
2585 
2586 instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
2587   predicate(UseSVE > 0 &&
2588             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
2589   match(Set dst (LShiftCntV cnt));
2590   match(Set dst (RShiftCntV cnt));
2591   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
2592   ins_encode %{
2593     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
2594   %}
2595   ins_pipe(pipe_slow);
2596 %}
2597 
2598 // vector sqrt
2599 
2600 instruct vsqrtF(vReg dst, vReg src) %{
2601   predicate(UseSVE > 0);
2602   match(Set dst (SqrtVF src));
2603   ins_cost(SVE_COST);
2604   format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
2605   ins_encode %{
2606     __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
2607          ptrue, as_FloatRegister($src$$reg));
2608   %}
2609   ins_pipe(pipe_slow);
2610 %}
2611 
2612 instruct vsqrtD(vReg dst, vReg src) %{
2613   predicate(UseSVE > 0);
2614   match(Set dst (SqrtVD src));
2615   ins_cost(SVE_COST);
2616   format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
2617   ins_encode %{
2618     __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
2619          ptrue, as_FloatRegister($src$$reg));
2620   %}
2621   ins_pipe(pipe_slow);
2622 %}
2623 
2624 // vector sub
2625 
2626 instruct vsubB(vReg dst, vReg src1, vReg src2) %{
2627   predicate(UseSVE > 0);
2628   match(Set dst (SubVB src1 src2));
2629   ins_cost(SVE_COST);
2630   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
2631   ins_encode %{
2632     __ sve_sub(as_FloatRegister($dst$$reg), __ B,
2633          as_FloatRegister($src1$$reg),
2634          as_FloatRegister($src2$$reg));
2635   %}
2636   ins_pipe(pipe_slow);
2637 %}
2638 
2639 instruct vsubS(vReg dst, vReg src1, vReg src2) %{
2640   predicate(UseSVE > 0);
2641   match(Set dst (SubVS src1 src2));
2642   ins_cost(SVE_COST);
2643   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
2644   ins_encode %{
2645     __ sve_sub(as_FloatRegister($dst$$reg), __ H,
2646          as_FloatRegister($src1$$reg),
2647          as_FloatRegister($src2$$reg));
2648   %}
2649   ins_pipe(pipe_slow);
2650 %}
2651 
2652 instruct vsubI(vReg dst, vReg src1, vReg src2) %{
2653   predicate(UseSVE > 0);
2654   match(Set dst (SubVI src1 src2));
2655   ins_cost(SVE_COST);
2656   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
2657   ins_encode %{
2658     __ sve_sub(as_FloatRegister($dst$$reg), __ S,
2659          as_FloatRegister($src1$$reg),
2660          as_FloatRegister($src2$$reg));
2661   %}
2662   ins_pipe(pipe_slow);
2663 %}
2664 
2665 instruct vsubL(vReg dst, vReg src1, vReg src2) %{
2666   predicate(UseSVE > 0);
2667   match(Set dst (SubVL src1 src2));
2668   ins_cost(SVE_COST);
2669   format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
2670   ins_encode %{
2671     __ sve_sub(as_FloatRegister($dst$$reg), __ D,
2672          as_FloatRegister($src1$$reg),
2673          as_FloatRegister($src2$$reg));
2674   %}
2675   ins_pipe(pipe_slow);
2676 %}
2677 
2678 instruct vsubF(vReg dst, vReg src1, vReg src2) %{
2679   predicate(UseSVE > 0);
2680   match(Set dst (SubVF src1 src2));
2681   ins_cost(SVE_COST);
2682   format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
2683   ins_encode %{
2684     __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
2685          as_FloatRegister($src1$$reg),
2686          as_FloatRegister($src2$$reg));
2687   %}
2688   ins_pipe(pipe_slow);
2689 %}
2690 
2691 instruct vsubD(vReg dst, vReg src1, vReg src2) %{
2692   predicate(UseSVE > 0);
2693   match(Set dst (SubVD src1 src2));
2694   ins_cost(SVE_COST);
2695   format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
2696   ins_encode %{
2697     __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
2698          as_FloatRegister($src1$$reg),
2699          as_FloatRegister($src2$$reg));
2700   %}
2701   ins_pipe(pipe_slow);
2702 %}
2703 
2704 // vector mask cast
2705 
2706 instruct vmaskcast(vReg dst) %{
2707   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
2708             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
2709   match(Set dst (VectorMaskCast dst));
2710   ins_cost(0);
2711   format %{ "vmaskcast $dst\t# empty (sve)" %}
2712   ins_encode %{
2713     // empty
2714   %}
2715   ins_pipe(pipe_class_empty);
2716 %}
2717 
2718 // ------------------------------ Vector cast -------------------------------
2719 
2720 instruct vcvtBtoS(vReg dst, vReg src)
2721 %{
2722   predicate(UseSVE > 0 &&
2723             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2724   match(Set dst (VectorCastB2X src));
2725   ins_cost(SVE_COST);
2726   format %{ "sve_sunpklo  $dst, H, $src\t# convert B to S vector" %}
2727   ins_encode %{
2728     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2729   %}
2730   ins_pipe(pipe_slow);
2731 %}
2732 
2733 instruct vcvtBtoI(vReg dst, vReg src)
2734 %{
2735   predicate(UseSVE > 0 &&
2736             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2737   match(Set dst (VectorCastB2X src));
2738   ins_cost(2 * SVE_COST);
2739   format %{ "sve_sunpklo  $dst, H, $src\n\t"
2740             "sve_sunpklo  $dst, S, $dst\t# convert B to I vector" %}
2741   ins_encode %{
2742     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2743     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2744   %}
2745   ins_pipe(pipe_slow);
2746 %}
2747 
2748 instruct vcvtBtoL(vReg dst, vReg src)
2749 %{
2750   predicate(UseSVE > 0 &&
2751             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2752   match(Set dst (VectorCastB2X src));
2753   ins_cost(3 * SVE_COST);
2754   format %{ "sve_sunpklo  $dst, H, $src\n\t"
2755             "sve_sunpklo  $dst, S, $dst\n\t"
2756             "sve_sunpklo  $dst, D, $dst\t# convert B to L vector" %}
2757   ins_encode %{
2758     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2759     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2760     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2761   %}
2762   ins_pipe(pipe_slow);
2763 %}
2764 
2765 instruct vcvtBtoF(vReg dst, vReg src)
2766 %{
2767   predicate(UseSVE > 0 &&
2768             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2769   match(Set dst (VectorCastB2X src));
2770   ins_cost(3 * SVE_COST);
2771   format %{ "sve_sunpklo  $dst, H, $src\n\t"
2772             "sve_sunpklo  $dst, S, $dst\n\t"
2773             "sve_scvtf  $dst, S, $dst, S\t# convert B to F vector" %}
2774   ins_encode %{
2775     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2776     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2777     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
2778   %}
2779   ins_pipe(pipe_slow);
2780 %}
2781 
2782 instruct vcvtBtoD(vReg dst, vReg src)
2783 %{
2784   predicate(UseSVE > 0 &&
2785             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2786   match(Set dst (VectorCastB2X src));
2787   ins_cost(4 * SVE_COST);
2788   format %{ "sve_sunpklo  $dst, H, $src\n\t"
2789             "sve_sunpklo  $dst, S, $dst\n\t"
2790             "sve_sunpklo  $dst, D, $dst\n\t"
2791             "sve_scvtf  $dst, D, $dst, D\t# convert B to D vector" %}
2792   ins_encode %{
2793     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
2794     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
2795     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2796     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
2797   %}
2798   ins_pipe(pipe_slow);
2799 %}
2800 
2801 instruct vcvtStoB(vReg dst, vReg src, vReg tmp)
2802 %{
2803   predicate(UseSVE > 0 &&
2804             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2805   match(Set dst (VectorCastS2X src));
2806   effect(TEMP tmp);
2807   ins_cost(2 * SVE_COST);
2808   format %{ "sve_dup  $tmp, B, 0\n\t"
2809             "sve_uzp1  $dst, B, $src, tmp\t# convert S to B vector" %}
2810   ins_encode %{
2811     __ sve_dup(as_FloatRegister($tmp$$reg), __ B, 0);
2812     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
2813   %}
2814   ins_pipe(pipe_slow);
2815 %}
2816 
2817 instruct vcvtStoI(vReg dst, vReg src)
2818 %{
2819   predicate(UseSVE > 0 &&
2820             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2821   match(Set dst (VectorCastS2X src));
2822   ins_cost(SVE_COST);
2823   format %{ "sve_sunpklo  $dst, S, $src\t# convert S to I vector" %}
2824   ins_encode %{
2825     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
2826   %}
2827   ins_pipe(pipe_slow);
2828 %}
2829 
2830 instruct vcvtStoL(vReg dst, vReg src)
2831 %{
2832   predicate(UseSVE > 0 &&
2833             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2834   match(Set dst (VectorCastS2X src));
2835   ins_cost(2 * SVE_COST);
2836   format %{ "sve_sunpklo  $dst, S, $src\n\t"
2837             "sve_sunpklo  $dst, D, $dst\t# convert S to L vector" %}
2838   ins_encode %{
2839     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
2840     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2841   %}
2842   ins_pipe(pipe_slow);
2843 %}
2844 
2845 instruct vcvtStoF(vReg dst, vReg src)
2846 %{
2847   predicate(UseSVE > 0 &&
2848             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2849   match(Set dst (VectorCastS2X src));
2850   ins_cost(2 * SVE_COST);
2851   format %{ "sve_sunpklo  $dst, S, $src\n\t"
2852             "sve_scvtf  $dst, S, $dst, S\t# convert S to F vector" %}
2853   ins_encode %{
2854     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
2855     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
2856   %}
2857   ins_pipe(pipe_slow);
2858 %}
2859 
2860 instruct vcvtStoD(vReg dst, vReg src)
2861 %{
2862   predicate(UseSVE > 0 &&
2863             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2864   match(Set dst (VectorCastS2X src));
2865   ins_cost(3 * SVE_COST);
2866   format %{ "sve_sunpklo  $dst, S, $src\n\t"
2867             "sve_sunpklo  $dst, D, $dst\n\t"
2868             "sve_scvtf  $dst, D, $dst, D\t# convert S to D vector" %}
2869   ins_encode %{
2870     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
2871     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2872     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
2873   %}
2874   ins_pipe(pipe_slow);
2875 %}
2876 
2877 instruct vcvtItoB(vReg dst, vReg src, vReg tmp)
2878 %{
2879   predicate(UseSVE > 0 &&
2880             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2881   match(Set dst (VectorCastI2X src));
2882   effect(TEMP_DEF dst, TEMP tmp);
2883   ins_cost(3 * SVE_COST);
2884   format %{ "sve_dup  $tmp, H, 0\n\t"
2885             "sve_uzp1  $dst, H, $src, tmp\n\t"
2886             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert I to B vector" %}
2887   ins_encode %{
2888     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
2889     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
2890     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2891   %}
2892   ins_pipe(pipe_slow);
2893 %}
2894 
2895 instruct vcvtItoS(vReg dst, vReg src, vReg tmp)
2896 %{
2897   predicate(UseSVE > 0 &&
2898             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2899   match(Set dst (VectorCastI2X src));
2900   effect(TEMP tmp);
2901   ins_cost(2 * SVE_COST);
2902   format %{ "sve_dup  $tmp, H, 0\n\t"
2903             "sve_uzp1  $dst, H, $src, tmp\t# convert I to S vector" %}
2904   ins_encode %{
2905     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
2906     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
2907   %}
2908   ins_pipe(pipe_slow);
2909 %}
2910 
2911 instruct vcvtItoL(vReg dst, vReg src)
2912 %{
2913   predicate(UseSVE > 0 &&
2914             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2915   match(Set dst (VectorCastI2X src));
2916   ins_cost(SVE_COST);
2917   format %{ "sve_sunpklo  $dst, D, $src\t# convert I to L vector" %}
2918   ins_encode %{
2919     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
2920   %}
2921   ins_pipe(pipe_slow);
2922 %}
2923 
2924 instruct vcvtItoF(vReg dst, vReg src)
2925 %{
2926   predicate(UseSVE > 0 &&
2927             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
2928   match(Set dst (VectorCastI2X src));
2929   ins_cost(SVE_COST);
2930   format %{ "sve_scvtf  $dst, S, $src, S\t# convert I to F vector" %}
2931   ins_encode %{
2932     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2933   %}
2934   ins_pipe(pipe_slow);
2935 %}
2936 
2937 instruct vcvtItoD(vReg dst, vReg src)
2938 %{
2939   predicate(UseSVE > 0 &&
2940             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
2941   match(Set dst (VectorCastI2X src));
2942   ins_cost(2 * SVE_COST);
2943   format %{ "sve_sunpklo  $dst, D, $src\n\t"
2944             "sve_scvtf  $dst, D, $dst, D\t# convert I to D vector" %}
2945   ins_encode %{
2946     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
2947     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
2948   %}
2949   ins_pipe(pipe_slow);
2950 %}
2951 
2952 instruct vcvtLtoB(vReg dst, vReg src, vReg tmp)
2953 %{
2954   predicate(UseSVE > 0 &&
2955             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2956   match(Set dst (VectorCastL2X src));
2957   effect(TEMP_DEF dst, TEMP tmp);
2958   ins_cost(4 * SVE_COST);
2959   format %{ "sve_dup  $tmp, S, 0\n\t"
2960             "sve_uzp1  $dst, S, $src, tmp\n\t"
2961             "sve_uzp1  $dst, H, $dst, tmp\n\t"
2962             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert L to B vector" %}
2963   ins_encode %{
2964     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
2965     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
2966     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2967     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2968   %}
2969   ins_pipe(pipe_slow);
2970 %}
2971 
2972 instruct vcvtLtoS(vReg dst, vReg src, vReg tmp)
2973 %{
2974   predicate(UseSVE > 0 &&
2975             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2976   match(Set dst (VectorCastL2X src));
2977   effect(TEMP_DEF dst, TEMP tmp);
2978   ins_cost(3 * SVE_COST);
2979   format %{ "sve_dup  $tmp, S, 0\n\t"
2980             "sve_uzp1  $dst, S, $src, tmp\n\t"
2981             "sve_uzp1  $dst, H, $dst, tmp\n\t# convert L to S vector" %}
2982   ins_encode %{
2983     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
2984     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
2985     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2986   %}
2987   ins_pipe(pipe_slow);
2988 %}
2989 
2990 instruct vcvtLtoI(vReg dst, vReg src, vReg tmp)
2991 %{
2992   predicate(UseSVE > 0 &&
2993             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2994   match(Set dst (VectorCastL2X src));
2995   effect(TEMP tmp);
2996   ins_cost(2 * SVE_COST);
2997   format %{ "sve_dup  $tmp, S, 0\n\t"
2998             "sve_uzp1  $dst, S, $src, tmp\t# convert L to I vector" %}
2999   ins_encode %{
3000     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3001     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
3002   %}
3003   ins_pipe(pipe_slow);
3004 %}
3005 
3006 instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)
3007 %{
3008   predicate(UseSVE > 0 &&
3009             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
3010   match(Set dst (VectorCastL2X src));
3011   effect(TEMP_DEF dst, TEMP tmp);
3012   ins_cost(3 * SVE_COST);
3013   format %{ "sve_scvtf  $dst, S, $src, D\n\t"
3014             "sve_dup  $tmp, S, 0\n\t"
3015             "sve_uzp1  $dst, S, $dst, $tmp\t# convert L to F vector" %}
3016   ins_encode %{
3017     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
3018     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3019     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3020   %}
3021   ins_pipe(pipe_slow);
3022 %}
3023 
3024 instruct vcvtLtoD(vReg dst, vReg src)
3025 %{
3026   predicate(UseSVE > 0 &&
3027             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
3028   match(Set dst (VectorCastL2X src));
3029   ins_cost(SVE_COST);
3030   format %{ "sve_scvtf  $dst, D, $src, D\t# convert L to D vector" %}
3031   ins_encode %{
3032     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
3033   %}
3034   ins_pipe(pipe_slow);
3035 %}
3036 
3037 instruct vcvtFtoB(vReg dst, vReg src, vReg tmp)
3038 %{
3039   predicate(UseSVE > 0 &&
3040             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3041   match(Set dst (VectorCastF2X src));
3042   effect(TEMP_DEF dst, TEMP tmp);
3043   ins_cost(4 * SVE_COST);
3044   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
3045             "sve_dup  $tmp, H, 0\n\t"
3046             "sve_uzp1  $dst, H, $dst, tmp\n\t"
3047             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert F to B vector" %}
3048   ins_encode %{
3049     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
3050     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
3051     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3052     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3053   %}
3054   ins_pipe(pipe_slow);
3055 %}
3056 
3057 instruct vcvtFtoS(vReg dst, vReg src, vReg tmp)
3058 %{
3059   predicate(UseSVE > 0 &&
3060             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
3061   match(Set dst (VectorCastF2X src));
3062   effect(TEMP_DEF dst, TEMP tmp);
3063   ins_cost(3 * SVE_COST);
3064   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
3065             "sve_dup  $tmp, H, 0\n\t"
3066             "sve_uzp1  $dst, H, $dst, tmp\t# convert F to S vector" %}
3067   ins_encode %{
3068     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
3069     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
3070     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3071   %}
3072   ins_pipe(pipe_slow);
3073 %}
3074 
3075 instruct vcvtFtoI(vReg dst, vReg src)
3076 %{
3077   predicate(UseSVE > 0 &&
3078             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
3079   match(Set dst (VectorCastF2X src));
3080   ins_cost(SVE_COST);
3081   format %{ "sve_fcvtzs  $dst, S, $src, S\t# convert F to I vector" %}
3082   ins_encode %{
3083     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
3084   %}
3085   ins_pipe(pipe_slow);
3086 %}
3087 
3088 instruct vcvtFtoL(vReg dst, vReg src)
3089 %{
3090   predicate(UseSVE > 0 &&
3091             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
3092   match(Set dst (VectorCastF2X src));
3093   ins_cost(2 * SVE_COST);
3094   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
3095             "sve_sunpklo  $dst, D, $dst\t# convert F to L vector" %}
3096   ins_encode %{
3097     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
3098     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
3099   %}
3100   ins_pipe(pipe_slow);
3101 %}
3102 
3103 instruct vcvtFtoD(vReg dst, vReg src)
3104 %{
3105   predicate(UseSVE > 0 &&
3106             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
3107   match(Set dst (VectorCastF2X src));
3108   ins_cost(2 * SVE_COST);
3109   format %{ "sve_sunpklo  $dst, D, $src\n\t"
3110             "sve_fcvt  $dst, D, $dst, S\t# convert F to D vector" %}
3111   ins_encode %{
3112     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
3113     __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
3114   %}
3115   ins_pipe(pipe_slow);
3116 %}
3117 
3118 instruct vcvtDtoB(vReg dst, vReg src, vReg tmp)
3119 %{
3120   predicate(UseSVE > 0 &&
3121             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3122   match(Set dst (VectorCastD2X src));
3123   effect(TEMP_DEF dst, TEMP tmp);
3124   ins_cost(5 * SVE_COST);
3125   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
3126             "sve_dup  $tmp, S, 0\n\t"
3127             "sve_uzp1  $dst, S, $dst, tmp\n\t"
3128             "sve_uzp1  $dst, H, $dst, tmp\n\t"
3129             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert D to B vector" %}
3130   ins_encode %{
3131     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
3132     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3133     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3134     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3135     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3136   %}
3137   ins_pipe(pipe_slow);
3138 %}
3139 
3140 instruct vcvtDtoS(vReg dst, vReg src, vReg tmp)
3141 %{
3142   predicate(UseSVE > 0 &&
3143             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
3144   match(Set dst (VectorCastD2X src));
3145   effect(TEMP_DEF dst, TEMP tmp);
3146   ins_cost(4 * SVE_COST);
3147   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
3148             "sve_dup  $tmp, S, 0\n\t"
3149             "sve_uzp1  $dst, S, $dst, tmp\n\t"
3150             "sve_uzp1  $dst, H, $dst, tmp\n\t# convert D to S vector" %}
3151   ins_encode %{
3152     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
3153     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3154     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3155     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3156   %}
3157   ins_pipe(pipe_slow);
3158 %}
3159 
3160 instruct vcvtDtoI(vReg dst, vReg src, vReg tmp)
3161 %{
3162   predicate(UseSVE > 0 &&
3163             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
3164   match(Set dst (VectorCastD2X src));
3165   effect(TEMP_DEF dst, TEMP tmp);
3166   ins_cost(3 * SVE_COST);
3167   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
3168             "sve_dup  $tmp, S, 0\n\t"
3169             "sve_uzp1  $dst, S, $dst, tmp\t# convert D to I vector" %}
3170   ins_encode %{
3171     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
3172     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3173     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3174   %}
3175   ins_pipe(pipe_slow);
3176 %}
3177 
3178 instruct vcvtDtoL(vReg dst, vReg src)
3179 %{
3180   predicate(UseSVE > 0 &&
3181             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
3182   match(Set dst (VectorCastD2X src));
3183   ins_cost(SVE_COST);
3184   format %{ "sve_fcvtzs  $dst, D, $src, D\t# convert D to L vector" %}
3185   ins_encode %{
3186     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
3187   %}
3188   ins_pipe(pipe_slow);
3189 %}
3190 
3191 instruct vcvtDtoF(vReg dst, vReg src, vReg tmp)
3192 %{
3193   predicate(UseSVE > 0 &&
3194             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
3195   match(Set dst (VectorCastD2X src));
3196   effect(TEMP_DEF dst, TEMP tmp);
3197   ins_cost(3 * SVE_COST);
3198   format %{ "sve_fcvt  $dst, S, $src, D\n\t"
3199             "sve_dup  $tmp, S, 0\n\t"
3200             "sve_uzp1  $dst, S, $dst, $tmp\t# convert D to F vector" %}
3201   ins_encode %{
3202     __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
3203     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
3204     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3205   %}
3206   ins_pipe(pipe_slow);
3207 %}
3208 // ------------------------------ Vector extract ---------------------------------
3209 
3210 instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3211 %{
3212   predicate(UseSVE > 0);
3213   match(Set dst (ExtractB src idx));
3214   effect(TEMP pTmp, KILL cr);
3215   ins_cost(2 * SVE_COST);
3216   format %{ "sve_extract $dst, B, $pTmp, $src, $idx\n\t"
3217             "sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %}
3218   ins_encode %{
3219     __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pTmp$$reg),
3220                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3221     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U);
3222   %}
3223   ins_pipe(pipe_slow);
3224 %}
3225 
3226 instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3227 %{
3228   predicate(UseSVE > 0);
3229   match(Set dst (ExtractS src idx));
3230   effect(TEMP pTmp, KILL cr);
3231   ins_cost(2 * SVE_COST);
3232   format %{ "sve_extract $dst, H, $pTmp, $src, $idx\n\t"
3233             "sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %}
3234   ins_encode %{
3235     __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pTmp$$reg),
3236                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3237     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
3238   %}
3239   ins_pipe(pipe_slow);
3240 %}
3241 
3242 
3243 instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3244 %{
3245   predicate(UseSVE > 0);
3246   match(Set dst (ExtractI src idx));
3247   effect(TEMP pTmp, KILL cr);
3248   ins_cost(2 * SVE_COST);
3249   format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(I)" %}
3250   ins_encode %{
3251     __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pTmp$$reg),
3252                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3253   %}
3254   ins_pipe(pipe_slow);
3255 %}
3256 
3257 instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3258 %{
3259   predicate(UseSVE > 0);
3260   match(Set dst (ExtractL src idx));
3261   effect(TEMP pTmp, KILL cr);
3262   ins_cost(2 * SVE_COST);
3263   format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(L)" %}
3264   ins_encode %{
3265     __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg),
3266                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3267   %}
3268   ins_pipe(pipe_slow);
3269 %}
3270 
3271 instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3272 %{
3273   predicate(UseSVE > 0);
3274   match(Set dst (ExtractF src idx));
3275   effect(TEMP pTmp, KILL cr);
3276   ins_cost(2 * SVE_COST);
3277   format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(F)" %}
3278   ins_encode %{
3279     __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg),
3280                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3281   %}
3282   ins_pipe(pipe_slow);
3283 %}
3284 
3285 instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
3286 %{
3287   predicate(UseSVE > 0);
3288   match(Set dst (ExtractD src idx));
3289   effect(TEMP pTmp, KILL cr);
3290   ins_cost(2 * SVE_COST);
3291   format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(D)" %}
3292   ins_encode %{
3293     __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg),
3294                    as_FloatRegister($src$$reg), (int)($idx$$constant));
3295   %}
3296   ins_pipe(pipe_slow);
3297 %}
3298 
3299 // ------------------------------- VectorTest ----------------------------------
3300 
3301 instruct vtest_alltrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr)
3302 %{
3303   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
3304             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
3305   match(Set dst (VectorTest src1 src2));
3306   effect(TEMP pTmp, KILL cr);
3307   ins_cost(SVE_COST);
3308   format %{ "sve_cmpeq $pTmp, $src1, 0\n\t"
3309             "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %}
3310   ins_encode %{
3311     // "src2" is not used for sve.
3312     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
3313     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3314     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
3315                ptrue, as_FloatRegister($src1$$reg), 0);
3316     __ csetw(as_Register($dst$$reg), Assembler::EQ);
3317   %}
3318   ins_pipe(pipe_slow);
3319 %}
3320 
3321 instruct vtest_anytrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr)
3322 %{
3323   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
3324             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
3325   match(Set dst (VectorTest src1 src2));
3326   effect(TEMP pTmp, KILL cr);
3327   ins_cost(SVE_COST);
3328   format %{ "sve_cmpeq $pTmp, $src1, -1\n\t"
3329             "csetw $dst, NE\t# VectorTest (sve) - anytrue" %}
3330   ins_encode %{
3331     // "src2" is not used for sve.
3332     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
3333     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3334     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
3335                ptrue, as_FloatRegister($src1$$reg), -1);
3336     __ csetw(as_Register($dst$$reg), Assembler::NE);
3337   %}
3338   ins_pipe(pipe_slow);
3339 %}
3340 
3341 instruct vtest_alltrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr)
3342 %{
3343   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
3344             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
3345   match(Set dst (VectorTest src1 src2));
3346   effect(TEMP pTmp, KILL cr);
3347   ins_cost(SVE_COST);
3348   format %{ "vtest_alltrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - alltrue" %}
3349   ins_encode %{
3350     // "src2" is not used for sve.
3351     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
3352     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3353     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size,
3354                           Matcher::vector_length(this, $src1));
3355     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
3356                as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), 0);
3357     __ csetw(as_Register($dst$$reg), Assembler::EQ);
3358   %}
3359   ins_pipe(pipe_slow);
3360 %}
3361 
3362 instruct vtest_anytrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr)
3363 %{
3364   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
3365             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
3366   match(Set dst (VectorTest src1 src2));
3367   effect(TEMP pTmp, KILL cr);
3368   ins_cost(SVE_COST);
3369   format %{ "vtest_anytrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - anytrue" %}
3370   ins_encode %{
3371     // "src2" is not used for sve.
3372     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
3373     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3374     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size,
3375                           Matcher::vector_length(this, $src1));
3376     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
3377                as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), -1);
3378     __ csetw(as_Register($dst$$reg), Assembler::NE);
3379   %}
3380   ins_pipe(pipe_slow);
3381 %}
3382 
3383 // ------------------------------ Vector insert ---------------------------------
3384 
3385 instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr)
3386 %{
3387   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
3388             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
3389              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
3390              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
3391   match(Set dst (VectorInsert (Binary src val) idx));
3392   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
3393   ins_cost(4 * SVE_COST);
3394   format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t"
3395             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
3396             "sve_orr $dst, $src, $src\n\t"
3397             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
3398   ins_encode %{
3399     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3400     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3401     __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1);
3402     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
3403                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
3404     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3405     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
3406   %}
3407   ins_pipe(pipe_slow);
3408 %}
3409 
3410 instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr)
3411 %{
3412   predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
3413             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
3414   match(Set dst (VectorInsert (Binary src val) idx));
3415   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
3416   ins_cost(4 * SVE_COST);
3417   format %{ "sve_index $dst, S, -16, 1\n\t"
3418             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
3419             "sve_orr $dst, $src, $src\n\t"
3420             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
3421   ins_encode %{
3422     __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
3423     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
3424                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
3425     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3426     __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
3427   %}
3428   ins_pipe(pipe_slow);
3429 %}
3430 
3431 instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
3432 %{
3433   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
3434             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
3435              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
3436              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
3437   match(Set dst (VectorInsert (Binary src val) idx));
3438   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
3439   ins_cost(5 * SVE_COST);
3440   format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t"
3441             "sve_dup $dst, $idx\t# (B/S/I)\n\t"
3442             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
3443             "sve_orr $dst, $src, $src\n\t"
3444             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
3445   ins_encode %{
3446     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3447     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3448     __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1);
3449     __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant));
3450     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
3451                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
3452     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3453     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
3454   %}
3455   ins_pipe(pipe_slow);
3456 %}
3457 
3458 instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pTmp, rFlagsReg cr)
3459 %{
3460   predicate(UseSVE > 0 &&
3461             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
3462   match(Set dst (VectorInsert (Binary src val) idx));
3463   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
3464   ins_cost(4 * SVE_COST);
3465   format %{ "sve_index $dst, D, -16, 1\n\t"
3466             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
3467             "sve_orr $dst, $src, $src\n\t"
3468             "sve_cpy $dst, $pTmp, $val\t# insert into vector (L)" %}
3469   ins_encode %{
3470     __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
3471     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue,
3472                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
3473     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3474     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_Register($val$$reg));
3475   %}
3476   ins_pipe(pipe_slow);
3477 %}
3478 
3479 instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pTmp, rFlagsReg cr)
3480 %{
3481   predicate(UseSVE > 0 &&
3482             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
3483   match(Set dst (VectorInsert (Binary src val) idx));
3484   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
3485   ins_cost(4 * SVE_COST);
3486   format %{ "sve_index $dst, D, -16, 1\n\t"
3487             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
3488             "sve_orr $dst, $src, $src\n\t"
3489             "sve_cpy $dst, $pTmp, $val\t# insert into vector (D)" %}
3490   ins_encode %{
3491     __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
3492     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue,
3493                as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
3494     __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
3495     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
3496   %}
3497   ins_pipe(pipe_slow);
3498 %}
3499 
3500 instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
3501 %{
3502   predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
3503             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
3504   match(Set dst (VectorInsert (Binary src val) idx));
3505   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
3506   ins_cost(5 * SVE_COST);
3507   format %{ "sve_index $tmp1, S, 0, 1\n\t"
3508             "sve_dup $dst, S, $idx\n\t"
3509             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
3510             "sve_orr $dst, $src, $src\n\t"
3511             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
3512   ins_encode %{
3513     __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
3514     __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
3515     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
3516                as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
3517     __ sve_orr(as_FloatRegister($dst$$reg),
3518                as_FloatRegister($src$$reg),
3519                as_FloatRegister($src$$reg));
3520     __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
3521                as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
3522   %}
3523   ins_pipe(pipe_slow);
3524 %}
3525 
3526 // ------------------------------ Vector shuffle -------------------------------
3527 
3528 instruct loadshuffleB(vReg dst, vReg src)
3529 %{
3530   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3531   match(Set dst (VectorLoadShuffle src));
3532   ins_cost(SVE_COST);
3533   format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %}
3534   ins_encode %{
3535     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
3536       __ sve_orr(as_FloatRegister($dst$$reg),
3537                  as_FloatRegister($src$$reg),
3538                  as_FloatRegister($src$$reg));
3539     }
3540   %}
3541   ins_pipe(pipe_slow);
3542 %}
3543 
3544 instruct loadshuffleS(vReg dst, vReg src)
3545 %{
3546   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
3547   match(Set dst (VectorLoadShuffle src));
3548   ins_cost(SVE_COST);
3549   format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %}
3550   ins_encode %{
3551     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
3552   %}
3553   ins_pipe(pipe_slow);
3554 %}
3555 
3556 instruct loadshuffleI(vReg dst, vReg src)
3557 %{
3558   predicate(UseSVE > 0 &&
3559            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3560             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
3561   match(Set dst (VectorLoadShuffle src));
3562   ins_cost(2 * SVE_COST);
3563   format %{ "sve_uunpklo $dst, H, $src\n\t"
3564             "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %}
3565   ins_encode %{
3566     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
3567     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
3568   %}
3569   ins_pipe(pipe_slow);
3570 %}
3571 
3572 instruct loadshuffleL(vReg dst, vReg src)
3573 %{
3574   predicate(UseSVE > 0 &&
3575            (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3576             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3577   match(Set dst (VectorLoadShuffle src));
3578   ins_cost(3 * SVE_COST);
3579   format %{ "sve_uunpklo $dst, H, $src\n\t"
3580             "sve_uunpklo $dst, S, $dst\n\t"
3581             "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %}
3582   ins_encode %{
3583     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
3584     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
3585     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
3586   %}
3587   ins_pipe(pipe_slow);
3588 %}
3589 
3590 // ------------------------------ Vector rearrange -------------------------------
3591 
3592 instruct rearrange(vReg dst, vReg src, vReg shuffle)
3593 %{
3594   predicate(UseSVE > 0);
3595   match(Set dst (VectorRearrange src shuffle));
3596   ins_cost(SVE_COST);
3597   format %{ "sve_tbl $dst, $src, $shuffle\t# vector rearrange" %}
3598   ins_encode %{
3599     BasicType bt = Matcher::vector_element_basic_type(this, $src);
3600     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
3601     __ sve_tbl(as_FloatRegister($dst$$reg), size,
3602                as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
3603   %}
3604   ins_pipe(pipe_slow);
3605 %}
3606 
3607 // ------------------------------ Vector Load Gather ---------------------------------
3608 
3609 instruct gatherI(vReg dst, indirect mem, vReg idx) %{
3610   predicate(UseSVE > 0 &&
3611             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
3612             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3613              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
3614   match(Set dst (LoadVectorGather mem idx));
3615   ins_cost(SVE_COST);
3616   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %}
3617   ins_encode %{
3618     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
3619                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
3620   %}
3621   ins_pipe(pipe_slow);
3622 %}
3623 
3624 instruct gatherL(vReg dst, indirect mem, vReg idx) %{
3625   predicate(UseSVE > 0 &&
3626             n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
3627             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3628              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3629   match(Set dst (LoadVectorGather mem idx));
3630   ins_cost(2 * SVE_COST);
3631   format %{ "sve_uunpklo $idx, $idx\n\t"
3632             "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %}
3633   ins_encode %{
3634     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
3635     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
3636   %}
3637   ins_pipe(pipe_slow);
3638 %}
3639 
3640 // ------------------------------ Vector Load Gather Partial-------------------------------
3641 
3642 instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
3643   predicate(UseSVE > 0 &&
3644             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
3645             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3646              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
3647   match(Set dst (LoadVectorGather mem idx));
3648   effect(TEMP pTmp, KILL cr);
3649   ins_cost(2 * SVE_COST + INSN_COST);
3650   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
3651             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %}
3652   ins_encode %{
3653     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
3654                           Matcher::vector_length(this));
3655     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
3656                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
3657   %}
3658   ins_pipe(pipe_slow);
3659 %}
3660 
3661 instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
3662   predicate(UseSVE > 0 &&
3663             n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
3664             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3665              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3666   match(Set dst (LoadVectorGather mem idx));
3667   effect(TEMP pTmp, KILL cr);
3668   ins_cost(3 * SVE_COST + INSN_COST);
3669   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
3670             "sve_uunpklo $idx, $idx\n\t"
3671             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %}
3672   ins_encode %{
3673     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
3674                           Matcher::vector_length(this));
3675     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
3676     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
3677                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
3678   %}
3679   ins_pipe(pipe_slow);
3680 %}
3681 
3682 // ------------------------------ Vector Store Scatter -------------------------------
3683 
3684 instruct scatterI(indirect mem, vReg src, vReg idx) %{
3685   predicate(UseSVE > 0 &&
3686             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
3687             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3688              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
3689   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3690   ins_cost(SVE_COST);
3691   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
3692   ins_encode %{
3693     __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
3694                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
3695   %}
3696   ins_pipe(pipe_slow);
3697 %}
3698 
3699 instruct scatterL(indirect mem, vReg src, vReg idx) %{
3700   predicate(UseSVE > 0 &&
3701             n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
3702             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3703              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3704   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3705   ins_cost(2 * SVE_COST);
3706   format %{ "sve_uunpklo $idx, $idx\n\t"
3707             "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %}
3708   ins_encode %{
3709     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D,
3710                    as_FloatRegister($idx$$reg));
3711     __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
3712                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
3713   %}
3714   ins_pipe(pipe_slow);
3715 %}
3716 
3717 // ------------------------------ Vector Store Scatter Partial-------------------------------
3718 
3719 instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
3720   predicate(UseSVE > 0 &&
3721             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
3722             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
3723              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
3724   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3725   effect(TEMP pTmp, KILL cr);
3726   ins_cost(2 * SVE_COST + INSN_COST);
3727   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
3728             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %}
3729   ins_encode %{
3730     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
3731                           Matcher::vector_length(this, $src));
3732     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
3733                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
3734   %}
3735   ins_pipe(pipe_slow);
3736 %}
3737 
3738 instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
3739   predicate(UseSVE > 0 &&
3740             n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
3741             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
3742              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
3743   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3744   effect(TEMP pTmp, KILL cr);
3745   ins_cost(3 * SVE_COST + INSN_COST);
3746   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
3747             "sve_uunpklo $idx, $idx\n\t"
3748             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %}
3749   ins_encode %{
3750     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
3751                           Matcher::vector_length(this, $src));
3752     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
3753     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
3754                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
3755   %}
3756   ins_pipe(pipe_slow);
3757 %}
3758 
3759 
3760 // ------------------------------ Vector Load Const -------------------------------
3761 
3762 instruct loadconB(vReg dst, immI0 src) %{
3763   predicate(UseSVE > 0 &&
3764             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
3765   match(Set dst (VectorLoadConst src));
3766   ins_cost(SVE_COST);
3767   format %{ "sve_index $dst, 0, 1\t# generate iota indices" %}
3768   ins_encode %{
3769     __ sve_index(as_FloatRegister($dst$$reg), __ B, 0, 1);
3770   %}
3771   ins_pipe(pipe_slow);
3772 %}
3773 
3774 // Intrisics for String.indexOf(char)
3775 
3776 
3777 instruct stringL_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
3778                                   iRegI_R0 result, vReg ztmp1, vReg ztmp2,
3779                                   pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
3780 %{
3781   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
3782   predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
3783   effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
3784 
3785   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
3786 
3787   ins_encode %{
3788     __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
3789                                as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
3790                                as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), true /* isL */);
3791   %}
3792   ins_pipe(pipe_class_memory);
3793 %}
3794 
3795 instruct stringU_indexof_char_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
3796                                   iRegI_R0 result, vReg ztmp1, vReg ztmp2,
3797                                   pRegGov pgtmp, pReg ptmp, rFlagsReg cr)
3798 %{
3799   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
3800   predicate((UseSVE > 0) && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
3801   effect(TEMP ztmp1, TEMP ztmp2, TEMP pgtmp, TEMP ptmp, KILL cr);
3802 
3803   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result # use sve" %}
3804 
3805   ins_encode %{
3806     __ string_indexof_char_sve($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
3807                                as_FloatRegister($ztmp1$$reg), as_FloatRegister($ztmp2$$reg),
3808                                as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), false /* isL */);
3809   %}
3810   ins_pipe(pipe_class_memory);
3811 %}
3812 
3813 // ---------------------------- Vector mask reductions ---------------------------
3814 
3815 instruct vmask_truecount(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
3816   predicate(UseSVE > 0 &&
3817             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3818   match(Set dst (VectorMaskTrueCount src));
3819   effect(TEMP ptmp, KILL cr);
3820   ins_cost(2 * SVE_COST);
3821   format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %}
3822   ins_encode %{
3823     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
3824                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
3825   %}
3826   ins_pipe(pipe_slow);
3827 %}
3828 
3829 instruct vmask_firsttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
3830   predicate(UseSVE > 0 &&
3831             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3832   match(Set dst (VectorMaskFirstTrue src));
3833   effect(TEMP ptmp, KILL cr);
3834   ins_cost(3 * SVE_COST);
3835   format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
3836   ins_encode %{
3837     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
3838                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
3839   %}
3840   ins_pipe(pipe_slow);
3841 %}
3842 
3843 instruct vmask_lasttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
3844   predicate(UseSVE > 0 &&
3845             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3846   match(Set dst (VectorMaskLastTrue src));
3847   effect(TEMP ptmp, KILL cr);
3848   ins_cost(4 * SVE_COST);
3849   format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
3850   ins_encode %{
3851     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
3852                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
3853   %}
3854   ins_pipe(pipe_slow);
3855 %}
3856 
3857 instruct vmask_truecount_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{
3858   predicate(UseSVE > 0 &&
3859             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3860   match(Set dst (VectorMaskTrueCount src));
3861   effect(TEMP ptmp, KILL cr);
3862   ins_cost(3 * SVE_COST);
3863   format %{ "vmask_truecount $dst, $src\t# vector mask truecount partial (sve)" %}
3864   ins_encode %{
3865     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B,
3866                           Matcher::vector_length(this, $src));
3867     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
3868                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
3869   %}
3870   ins_pipe(pipe_slow);
3871 %}
3872 
3873 instruct vmask_firsttrue_partial(iRegINoSp dst, vReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
3874   predicate(UseSVE > 0 &&
3875             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3876   match(Set dst (VectorMaskFirstTrue src));
3877   effect(TEMP pgtmp, TEMP ptmp, KILL cr);
3878   ins_cost(4 * SVE_COST);
3879   format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %}
3880   ins_encode %{
3881     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B,
3882                           Matcher::vector_length(this, $src));
3883     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
3884                            as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg));
3885   %}
3886   ins_pipe(pipe_slow);
3887 %}
3888 
3889 instruct vmask_lasttrue_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{
3890   predicate(UseSVE > 0 &&
3891             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3892   match(Set dst (VectorMaskLastTrue src));
3893   effect(TEMP ptmp, KILL cr);
3894   ins_cost(5 * SVE_COST);
3895   format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %}
3896   ins_encode %{
3897     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B,
3898                           Matcher::vector_length(this, $src));
3899     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
3900                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
3901   %}
3902   ins_pipe(pipe_slow);
3903 %}
3904 
3905 // ----------------- Vector mask reductions combined with VectorMaskStore ---------------
3906 
3907 instruct vstoremask_truecount(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
3908   predicate(UseSVE > 0 &&
3909             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3910   match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize)));
3911   effect(TEMP ptmp, KILL cr);
3912   ins_cost(2 * SVE_COST);
3913   format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount (sve)" %}
3914   ins_encode %{
3915     unsigned size = $esize$$constant;
3916     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
3917     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
3918     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
3919                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
3920   %}
3921   ins_pipe(pipe_slow);
3922 %}
3923 
3924 instruct vstoremask_firsttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
3925   predicate(UseSVE > 0 &&
3926             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3927   match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize)));
3928   effect(TEMP ptmp, KILL cr);
3929   ins_cost(3 * SVE_COST);
3930   format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
3931   ins_encode %{
3932     unsigned size = $esize$$constant;
3933     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
3934     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
3935     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
3936                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
3937   %}
3938   ins_pipe(pipe_slow);
3939 %}
3940 
3941 instruct vstoremask_lasttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
3942   predicate(UseSVE > 0 &&
3943             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
3944   match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize)));
3945   effect(TEMP ptmp, KILL cr);
3946   ins_cost(4 * SVE_COST);
3947   format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
3948   ins_encode %{
3949     unsigned size = $esize$$constant;
3950     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
3951     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
3952     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
3953                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
3954   %}
3955   ins_pipe(pipe_slow);
3956 %}
3957 
3958 instruct vstoremask_truecount_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{
3959   predicate(UseSVE > 0 &&
3960             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3961   match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize)));
3962   effect(TEMP ptmp, KILL cr);
3963   ins_cost(3 * SVE_COST);
3964   format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount partial (sve)" %}
3965   ins_encode %{
3966     unsigned size = $esize$$constant;
3967     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
3968     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
3969     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
3970                           Matcher::vector_length(this, $src));
3971     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
3972                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
3973   %}
3974   ins_pipe(pipe_slow);
3975 %}
3976 
3977 instruct vstoremask_firsttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
3978   predicate(UseSVE > 0 &&
3979             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3980   match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize)));
3981   effect(TEMP pgtmp, TEMP ptmp, KILL cr);
3982   ins_cost(4 * SVE_COST);
3983   format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %}
3984   ins_encode %{
3985     unsigned size = $esize$$constant;
3986     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
3987     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
3988     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), variant,
3989                           Matcher::vector_length(this, $src));
3990     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
3991                            as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
3992   %}
3993   ins_pipe(pipe_slow);
3994 %}
3995 
3996 instruct vstoremask_lasttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{
3997   predicate(UseSVE > 0 &&
3998             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
3999   match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize)));
4000   effect(TEMP ptmp, KILL cr);
4001   ins_cost(5 * SVE_COST);
4002   format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %}
4003   ins_encode %{
4004     unsigned size = $esize$$constant;
4005     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
4006     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
4007     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
4008                           Matcher::vector_length(this, $src));
4009     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
4010                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
4011   %}
4012   ins_pipe(pipe_slow);
4013 %}