< prev index next >

src/hotspot/cpu/aarch64/aarch64_sve.ad

Print this page
*** 86,10 ***
--- 86,11 ---
  // size equals to memory element (load from/store to) size.
  opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
  
  source_hpp %{
    bool op_sve_supported(int opcode, int vlen, BasicType bt);
+   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt);
  %}
  
  source %{
  
    typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,

*** 142,24 ***
        case Op_ExtractUB:
          return false;
        // Vector API specific
        case Op_VectorLoadShuffle:
        case Op_VectorRearrange:
!         if (vlen < 4 || length_in_bytes > MaxVectorSize) {
-           return false;
-         } else {
-           return true;
-         }
        case Op_LoadVector:
        case Op_StoreVector:
          return Matcher::vector_size_supported(bt, vlen);
        default:
          break;
      }
      // By default, we only support vector operations with no less than 8 bytes and 2 elements.
      return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
    }
  %}
  
  definitions %{
    int_def SVE_COST             (200, 200);
  %}
--- 143,28 ---
        case Op_ExtractUB:
          return false;
        // Vector API specific
        case Op_VectorLoadShuffle:
        case Op_VectorRearrange:
!         return vlen >= 4 && length_in_bytes <= MaxVectorSize;
        case Op_LoadVector:
        case Op_StoreVector:
          return Matcher::vector_size_supported(bt, vlen);
        default:
          break;
      }
      // By default, we only support vector operations with no less than 8 bytes and 2 elements.
      return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
    }
+ 
+   bool masked_op_sve_supported(int opcode, int vlen, BasicType bt) {
+     if (opcode == Op_VectorRearrange) {
+       return false;
+     }
+     return op_sve_supported(opcode, vlen, bt);
+   }
+ 
  %}
  
  definitions %{
    int_def SVE_COST             (200, 200);
  %}

*** 292,50 ***
  
  // Predicated vector load/store, based on the vector length of the node.
  // Only load/store values in the range of the memory_size. This is needed
  // when the memory_size is lower than the hardware supported max vector size.
  // And this might happen for Vector API mask vector load/store.
! instruct loadV_partial(vReg dst, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
              n->as_LoadVector()->memory_size() < MaxVectorSize);
    match(Set dst (LoadVector mem));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(6 * SVE_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
!             "sve_ldr $dst, $pTmp, $mem\t# load vector predicated" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
                            Matcher::vector_length(this));
      FloatRegister dst_reg = as_FloatRegister($dst$$reg);
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
!                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct storeV_partial(vReg src, vmemA mem, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
              n->as_StoreVector()->memory_size() < MaxVectorSize);
    match(Set mem (StoreVector mem src));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
!             "sve_str $src, $pTmp, $mem\t# store vector predicated" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ elemType_to_regVariant(bt),
                            Matcher::vector_length(this, $src));
      FloatRegister src_reg = as_FloatRegister($src$$reg);
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
!                           as_PRegister($pTmp$$reg), bt, bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector reinterpret
  
  instruct reinterpret(vReg dst) %{
    predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
                            n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst
--- 297,256 ---
  
  // Predicated vector load/store, based on the vector length of the node.
  // Only load/store values in the range of the memory_size. This is needed
  // when the memory_size is lower than the hardware supported max vector size.
  // And this might happen for Vector API mask vector load/store.
! instruct loadV_partial(vReg dst, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() > 16 &&
              n->as_LoadVector()->memory_size() < MaxVectorSize);
    match(Set dst (LoadVector mem));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(6 * SVE_COST);
!   format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t"
!             "sve_ldr $dst, $pgtmp, $mem\t# load vector partial" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
                            Matcher::vector_length(this));
      FloatRegister dst_reg = as_FloatRegister($dst$$reg);
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg,
!                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct storeV_partial(vReg src, vmemA mem, pRegGov pgtmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() > 16 &&
              n->as_StoreVector()->memory_size() < MaxVectorSize);
    match(Set mem (StoreVector mem src));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "sve_whilelo_zr_imm $pgtmp, vector_length\n\t"
!             "sve_str $src, $pgtmp, $mem\t# store vector partial" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
                            Matcher::vector_length(this, $src));
      FloatRegister src_reg = as_FloatRegister($src$$reg);
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, src_reg,
!                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
+                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector load/store - predicated
+ 
+ instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() == MaxVectorSize);
+   match(Set dst (LoadVectorMasked mem pg));
+   ins_cost(4 * SVE_COST);
+   format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg),
+                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
+                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct loadV_masked_partial(vReg dst, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() < MaxVectorSize);
+   match(Set dst (LoadVectorMasked mem pg));
+   effect(TEMP pgtmp, KILL cr);
+   ins_cost(6 * SVE_COST);
+   format %{ "sve_ldr $dst, $pg, $mem\t# load vector predicated partial (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
+                           Matcher::vector_length(this));
+     __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($dst$$reg),
+                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
+                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct storeV_masked(vReg src, vmemA mem, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() == MaxVectorSize);
+   match(Set mem (StoreVectorMasked mem (Binary src pg)));
+   ins_cost(4 * SVE_COST);
+   format %{ "sve_str $mem, $pg, $src\t# store vector predicated (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg),
+                           as_PRegister($pg$$reg), bt, bt, $mem->opcode(),
+                           as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct storeV_masked_partial(vReg src, vmemA mem, pRegGov pg, pRegGov pgtmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() < MaxVectorSize);
+   match(Set mem (StoreVectorMasked mem (Binary src pg)));
+   effect(TEMP pgtmp, KILL cr);
+   ins_cost(6 * SVE_COST);
+   format %{ "sve_str $mem, $pg, $src\t# store vector predicated partial (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ elemType_to_regVariant(bt),
+                           Matcher::vector_length(this, $src));
+     __ sve_and(as_PRegister($pgtmp$$reg), as_PRegister($pgtmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($src$$reg),
+                           as_PRegister($pgtmp$$reg), bt, bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // maskAll
+ 
+ instruct vmaskAll_immI(pRegGov dst, immI src) %{
+   predicate(UseSVE > 0);
+   match(Set dst (MaskAll src));
+   ins_cost(SVE_COST);
+   format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) (B/H/S)" %}
+   ins_encode %{
+     int con = (int)$src$$constant;
+     if (con == 0) {
+       __ sve_pfalse(as_PRegister($dst$$reg));
+     } else {
+       assert(con == -1, "invalid constant value for mask");
+       BasicType bt = Matcher::vector_element_basic_type(this);
+       __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt));
+     }
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmaskAllI(pRegGov dst, iRegIorL2I src, vReg tmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0);
+   match(Set dst (MaskAll src));
+   effect(TEMP tmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_dup $tmp, $src\n\t"
+             "sve_cmpne $dst, $tmp, 0\t# mask all (sve) (B/H/S)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg));
+     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmaskAll_immL(pRegGov dst, immL src) %{
+   predicate(UseSVE > 0);
+   match(Set dst (MaskAll src));
+   ins_cost(SVE_COST);
+   format %{ "sve_ptrue/sve_pfalse $dst\t# mask all (sve) (D)" %}
+   ins_encode %{
+     long con = (long)$src$$constant;
+     if (con == 0) {
+       __ sve_pfalse(as_PRegister($dst$$reg));
+     } else {
+       assert(con == -1, "invalid constant value for mask");
+       BasicType bt = Matcher::vector_element_basic_type(this);
+       __ sve_ptrue(as_PRegister($dst$$reg), __ elemType_to_regVariant(bt));
+     }
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmaskAllL(pRegGov dst, iRegL src, vReg tmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0);
+   match(Set dst (MaskAll src));
+   effect(TEMP tmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_dup $tmp, $src\n\t"
+             "sve_cmpne $dst, $tmp, 0\t# mask all (sve) (D)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_dup(as_FloatRegister($tmp$$reg), size, as_Register($src$$reg));
+     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // mask logical and/or/xor
+ 
+ instruct vmask_and(pRegGov pd, pRegGov pn, pRegGov pm) %{
+   predicate(UseSVE > 0);
+   match(Set pd (AndVMask pn pm));
+   ins_cost(SVE_COST);
+   format %{ "sve_and $pd, $pn, $pm\t# predicate (sve)" %}
+   ins_encode %{
+     __ sve_and(as_PRegister($pd$$reg), ptrue,
+                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmask_or(pRegGov pd, pRegGov pn, pRegGov pm) %{
+   predicate(UseSVE > 0);
+   match(Set pd (OrVMask pn pm));
+   ins_cost(SVE_COST);
+   format %{ "sve_orr $pd, $pn, $pm\t# predicate (sve)" %}
+   ins_encode %{
+     __ sve_orr(as_PRegister($pd$$reg), ptrue,
+                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmask_xor(pRegGov pd, pRegGov pn, pRegGov pm) %{
+   predicate(UseSVE > 0);
+   match(Set pd (XorVMask pn pm));
+   ins_cost(SVE_COST);
+   format %{ "sve_eor $pd, $pn, $pm\t# predicate (sve)" %}
+   ins_encode %{
+     __ sve_eor(as_PRegister($pd$$reg), ptrue,
+                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // mask logical and_not
+ 
+ instruct vmask_and_notI(pRegGov pd, pRegGov pn, pRegGov pm, immI_M1 m1) %{
+   predicate(UseSVE > 0);
+   match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1))));
+   ins_cost(SVE_COST);
+   format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) (B/H/S)" %}
+   ins_encode %{
+     __ sve_bic(as_PRegister($pd$$reg), ptrue,
+                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmask_and_notL(pRegGov pd, pRegGov pn, pRegGov pm, immL_M1 m1) %{
+   predicate(UseSVE > 0);
+   match(Set pd (AndVMask pn (XorVMask pm (MaskAll m1))));
+   ins_cost(SVE_COST);
+   format %{ "sve_bic $pd, $pn, $pm\t# predciate (sve) (D)" %}
+   ins_encode %{
+     __ sve_bic(as_PRegister($pd$$reg), ptrue,
+                as_PRegister($pn$$reg), as_PRegister($pm$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector reinterpret
  
  instruct reinterpret(vReg dst) %{
    predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() ==
                            n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src == dst

*** 346,37 ***
      // empty
    %}
    ins_pipe(pipe_class_empty);
  %}
  
! instruct reinterpretResize(vReg dst, vReg src, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
                            n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
    match(Set dst (VectorReinterpret src));
!   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
    ins_cost(3 * SVE_COST);
    format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
    ins_encode %{
      uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
      uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
      uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
                                    length_in_bytes_src : length_in_bytes_dst;
      assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
             "invalid vector length");
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ B, length_in_bytes_resize);
      __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
!     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pTmp$$reg),
                 as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector abs
  
  instruct vabsB(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (AbsVB src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ B,
--- 557,71 ---
      // empty
    %}
    ins_pipe(pipe_class_empty);
  %}
  
! instruct reinterpretResize(vReg dst, vReg src, pRegGov pgtmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() !=
                            n->in(1)->bottom_type()->is_vect()->length_in_bytes());  // src != dst
    match(Set dst (VectorReinterpret src));
!   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
    ins_cost(3 * SVE_COST);
    format %{ "reinterpretResize $dst, $src\t# vector (sve)" %}
    ins_encode %{
      uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
      uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
      uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
                                    length_in_bytes_src : length_in_bytes_dst;
      assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
             "invalid vector length");
!     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B, length_in_bytes_resize);
      __ sve_dup(as_FloatRegister($dst$$reg), __ B, 0);
!     __ sve_sel(as_FloatRegister($dst$$reg), __ B, as_PRegister($pgtmp$$reg),
                 as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector mask reinterpret
+ 
+ instruct vmask_reinterpret_same_esize(pRegGov dst_src) %{
+   predicate(UseSVE > 0 &&
+             n->as_Vector()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
+             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
+   match(Set dst_src (VectorReinterpret dst_src));
+   ins_cost(0);
+   format %{ "# vmask_reinterpret $dst_src\t# do nothing" %}
+   ins_encode %{
+     // empty
+   %}
+   ins_pipe(pipe_class_empty);
+ %}
+ 
+ instruct vmask_reinterpret_diff_esize(pRegGov dst, pRegGov src, vReg tmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_Vector()->length() != n->in(1)->bottom_type()->is_vect()->length() &&
+             n->as_Vector()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
+   match(Set dst (VectorReinterpret src));
+   effect(TEMP tmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "# vmask_reinterpret $dst, $src\t# vector (sve)" %}
+   ins_encode %{
+     BasicType from_bt = Matcher::vector_element_basic_type(this, $src);
+     Assembler::SIMD_RegVariant from_size = __ elemType_to_regVariant(from_bt);
+     BasicType to_bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
+     __ sve_cpy(as_FloatRegister($tmp$$reg), from_size, as_PRegister($src$$reg), -1, false);
+     __ sve_cmp(Assembler::EQ, as_PRegister($dst$$reg), to_size, ptrue, as_FloatRegister($tmp$$reg), -1);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector abs
  
  instruct vabsB(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVB src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (B)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ B,

*** 385,11 ***
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsS(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
    match(Set dst (AbsVS src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ H,
--- 630,11 ---
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsS(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVS src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (H)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ H,

*** 398,11 ***
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsI(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
    match(Set dst (AbsVI src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ S,
--- 643,11 ---
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsI(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVI src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ S,

*** 411,11 ***
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsL(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (AbsVL src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ D,
--- 656,11 ---
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsL(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVL src));
    ins_cost(SVE_COST);
    format %{ "sve_abs $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_abs(as_FloatRegister($dst$$reg), __ D,

*** 424,11 ***
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsF(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (AbsVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fabs(as_FloatRegister($dst$$reg), __ S,
--- 669,11 ---
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsF(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fabs $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fabs(as_FloatRegister($dst$$reg), __ S,

*** 437,21 ***
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsD(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (AbsVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fabs(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector add
  
  instruct vaddB(vReg dst, vReg src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst (AddVB src1 src2));
--- 682,101 ---
    ins_pipe(pipe_slow);
  %}
  
  instruct vabsD(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
!             !n->as_Vector()->is_predicated_vector());
    match(Set dst (AbsVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fabs $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fabs(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector abs - predicated
+ 
+ instruct vabsB_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVB dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_abs(as_FloatRegister($dst_src$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vabsS_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVS dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_abs(as_FloatRegister($dst_src$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vabsI_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVI dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_abs(as_FloatRegister($dst_src$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vabsL_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVL dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_abs $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_abs(as_FloatRegister($dst_src$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vabsF_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVF dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fabs $dst_src, $pg, $dst_src\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fabs(as_FloatRegister($dst_src$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vabsD_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (AbsVD dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fabs $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fabs(as_FloatRegister($dst_src$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector add
  
  instruct vaddB(vReg dst, vReg src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst (AddVB src1 src2));

*** 528,10 ***
--- 853,90 ---
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector add - predicated
+ 
+ instruct vaddB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVB (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_add(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vaddS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_add(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vaddI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVI (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_add(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vaddL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVL (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_add $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_add(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vaddF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVF (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fadd $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fadd(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vaddD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (AddVD (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fadd $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fadd(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector and
  
  instruct vand(vReg dst, vReg src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst (AndV src1 src2));

*** 573,48 ***
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector not
  
! instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{
    predicate(UseSVE > 0);
!   match(Set dst (XorV src (ReplicateB m1)));
-   match(Set dst (XorV src (ReplicateS m1)));
-   match(Set dst (XorV src (ReplicateI m1)));
    ins_cost(SVE_COST);
!   format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %}
    ins_encode %{
!     __ sve_not(as_FloatRegister($dst$$reg), __ D,
!                ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
    predicate(UseSVE > 0);
!   match(Set dst (XorV src (ReplicateL m1)));
    ins_cost(SVE_COST);
!   format %{ "sve_not $dst, $src\t# vector (sve) D" %}
    ins_encode %{
!     __ sve_not(as_FloatRegister($dst$$reg), __ D,
!                ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  
! // vector and_not
- 
- instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
    predicate(UseSVE > 0);
!   match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
-   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
-   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
    ins_cost(SVE_COST);
!   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %}
    ins_encode %{
      __ sve_bic(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg),
                 as_FloatRegister($src2$$reg));
    %}
--- 978,98 ---
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector and - predicated
  
! instruct vand_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src1 (AndV (Binary dst_src1 src2) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_and $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_and(as_FloatRegister($dst_src1$$reg), size,
+           as_PRegister($pg$$reg),
+           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector or - predicated
+ 
+ instruct vor_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src1 (OrV (Binary dst_src1 src2) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_orr $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_orr(as_FloatRegister($dst_src1$$reg), size,
+           as_PRegister($pg$$reg),
+           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector xor - predicated
  
! instruct vxor_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src1 (XorV (Binary dst_src1 src2) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_eor $dst_src1, $pg, $dst_src1, $src2\t # vector (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_eor(as_FloatRegister($dst_src1$$reg), size,
+           as_PRegister($pg$$reg),
+           as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector not
+ 
+ instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{
+   predicate(UseSVE > 0);
+   match(Set dst (XorV src (ReplicateB m1)));
+   match(Set dst (XorV src (ReplicateS m1)));
+   match(Set dst (XorV src (ReplicateI m1)));
+   ins_cost(SVE_COST);
+   format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %}
+   ins_encode %{
+     __ sve_not(as_FloatRegister($dst$$reg), __ D,
+                ptrue, as_FloatRegister($src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
+   predicate(UseSVE > 0);
+   match(Set dst (XorV src (ReplicateL m1)));
+   ins_cost(SVE_COST);
+   format %{ "sve_not $dst, $src\t# vector (sve) D" %}
+   ins_encode %{
+     __ sve_not(as_FloatRegister($dst$$reg), __ D,
+                ptrue, as_FloatRegister($src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector and_not
+ 
+ instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
+   predicate(UseSVE > 0);
+   match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
+   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
+   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
+   ins_cost(SVE_COST);
+   format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %}
    ins_encode %{
      __ sve_bic(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg),
                 as_FloatRegister($src2$$reg));
    %}

*** 632,11 ***
                 as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
- 
  // vector float div
  
  instruct vdivF(vReg dst_src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst_src1 (DivVF dst_src1 src2));
--- 1087,10 ---

*** 659,10 ***
--- 1113,38 ---
           ptrue, as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector float div - predicated
+ 
+ instruct vfdivF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (DivVF (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fdiv $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vfdivD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (DivVD (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fdiv $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector min/max
  
  instruct vmin(vReg dst_src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst_src1 (MinV dst_src1 src2));

*** 673,11 ***
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      if (is_floating_point_type(bt)) {
        __ sve_fmin(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      } else {
!       assert(is_integral_type(bt), "Unsupported type");
        __ sve_smin(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      }
    %}
    ins_pipe(pipe_slow);
--- 1155,11 ---
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      if (is_floating_point_type(bt)) {
        __ sve_fmin(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      } else {
!       assert(is_integral_type(bt), "unsupported type");
        __ sve_smin(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      }
    %}
    ins_pipe(pipe_slow);

*** 693,18 ***
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      if (is_floating_point_type(bt)) {
        __ sve_fmax(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      } else {
!       assert(is_integral_type(bt), "Unsupported type");
        __ sve_smax(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      }
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector fmla
  
  // dst_src1 = dst_src1 + src2 * src3
  instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
    predicate(UseFMA && UseSVE > 0);
--- 1175,60 ---
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      if (is_floating_point_type(bt)) {
        __ sve_fmax(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      } else {
!       assert(is_integral_type(bt), "unsupported type");
        __ sve_smax(as_FloatRegister($dst_src1$$reg), size,
                    ptrue, as_FloatRegister($src2$$reg));
      }
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector min/max - predicated
+ 
+ instruct vmin_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MinV (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_min $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     if (is_floating_point_type(bt)) {
+       __ sve_fmin(as_FloatRegister($dst_src1$$reg), size,
+                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     } else {
+       assert(is_integral_type(bt), "unsupported type");
+       __ sve_smin(as_FloatRegister($dst_src1$$reg), size,
+                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     }
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmax_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MaxV (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_max $dst_src1, $pg, $dst_src1, $src2\t# vector (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     if (is_floating_point_type(bt)) {
+       __ sve_fmax(as_FloatRegister($dst_src1$$reg), size,
+                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     } else {
+       assert(is_integral_type(bt), "unsupported type");
+       __ sve_smax(as_FloatRegister($dst_src1$$reg), size,
+                   as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     }
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector fmla
  
  // dst_src1 = dst_src1 + src2 * src3
  instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
    predicate(UseFMA && UseSVE > 0);

*** 729,10 ***
--- 1253,38 ---
           ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector fmla - predicated
+ 
+ // dst_src1 = dst_src1 * src2 + src3
+ instruct vfmlaF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
+   predicate(UseFMA && UseSVE > 0);
+   match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 pg)));
+   ins_cost(SVE_COST);
+   format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
+          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // dst_src1 = dst_src1 * src2 + src3
+ instruct vfmlaD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
+   predicate(UseFMA && UseSVE > 0);
+   match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 pg)));
+   ins_cost(SVE_COST);
+   format %{ "sve_fmad $dst_src1, $pg, $src2, $src3\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fmad(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
+          as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector fmls
  
  // dst_src1 = dst_src1 + -src2 * src3
  // dst_src1 = dst_src1 + src2 * -src3
  instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{

*** 937,11 ***
        ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
- 
  // vector mul
  
  instruct vmulB(vReg dst_src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst_src1 (MulVB dst_src1 src2));
--- 1489,10 ---

*** 1014,14 ***
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector fneg
  
  instruct vnegF(vReg dst, vReg src) %{
!   predicate(UseSVE > 0);
    match(Set dst (NegVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
--- 1565,95 ---
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector mul - predicated
+ 
+ instruct vmulB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVB (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmulS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmulI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVI (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmulL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVL (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_mul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmulF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVF (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fmul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fmul(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vmulD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (MulVD (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fmul $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fmul(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector fneg
  
  instruct vnegF(vReg dst, vReg src) %{
!   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector());
    match(Set dst (NegVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fneg(as_FloatRegister($dst$$reg), __ S,

*** 1029,21 ***
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vnegD(vReg dst, vReg src) %{
!   predicate(UseSVE > 0);
    match(Set dst (NegVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // popcount vector
  
  instruct vpopcountI(vReg dst, vReg src) %{
    predicate(UseSVE > 0);
    match(Set dst (PopCountVI src));
--- 1661,50 ---
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vnegD(vReg dst, vReg src) %{
!   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector());
    match(Set dst (NegVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector fneg - predicated
+ 
+ instruct vnegF_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (NegVF dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fneg $dst_src, $pg, $dst_src\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fneg(as_FloatRegister($dst_src$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (NegVD dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fneg $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fneg(as_FloatRegister($dst_src$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // popcount vector
  
  instruct vpopcountI(vReg dst, vReg src) %{
    predicate(UseSVE > 0);
    match(Set dst (PopCountVI src));

*** 1054,991 ***
    ins_pipe(pipe_slow);
  %}
  
  // vector mask compare
  
! instruct vmaskcmp(vReg dst, vReg src1, vReg src2, immI cond, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
!   effect(TEMP pTmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_cmp $pTmp, $src1, $src2\n\t"
-             "sve_cpy $dst, $pTmp, -1\t# vector mask cmp (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
                     as_FloatRegister($src2$$reg), (int)$cond$$constant);
-     __ sve_cpy(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
-                as_PRegister($pTmp$$reg), -1, false);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector blend
- 
- instruct vblend(vReg dst, vReg src1, vReg src2, vReg src3, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0);
!   match(Set dst (VectorBlend (Binary src1 src2) src3));
!   effect(TEMP pTmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_cmpeq $pTmp, $src3, -1\n\t"
-             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
    ins_encode %{
!     Assembler::SIMD_RegVariant size =
!       __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
-                ptrue, as_FloatRegister($src3$$reg), -1);
-     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg),
-                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector blend with compare
  
! instruct vblend_maskcmp(vReg dst, vReg src1, vReg src2, vReg src3,
-                         vReg src4, pRegGov pTmp, immI cond, rFlagsReg cr) %{
    predicate(UseSVE > 0);
!   match(Set dst (VectorBlend (Binary src1 src2) (VectorMaskCmp (Binary src3 src4) cond)));
!   effect(TEMP pTmp, KILL cr);
!   ins_cost(2 * SVE_COST);
-   format %{ "sve_cmp $pTmp, $src3, $src4\t# vector cmp (sve)\n\t"
-             "sve_sel $dst, $pTmp, $src2, $src1\t# vector blend (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_compare(as_PRegister($pTmp$$reg), bt, ptrue, as_FloatRegister($src3$$reg),
!                    as_FloatRegister($src4$$reg), (int)$cond$$constant);
!     __ sve_sel(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
-                as_PRegister($pTmp$$reg), as_FloatRegister($src2$$reg),
-                as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector load mask
  
! instruct vloadmaskB(vReg dst, vReg src) %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorLoadMask src));
    ins_cost(SVE_COST);
!   format %{ "sve_neg $dst, $src\t# vector load mask (B)" %}
-   ins_encode %{
-     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue, as_FloatRegister($src$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vloadmaskS(vReg dst, vReg src) %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-   match(Set dst (VectorLoadMask src));
-   ins_cost(2 * SVE_COST);
-   format %{ "sve_uunpklo $dst, H, $src\n\t"
-             "sve_neg $dst, $dst\t# vector load mask (B to H)" %}
    ins_encode %{
!     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
!     __ sve_neg(as_FloatRegister($dst$$reg), __ H, ptrue, as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vloadmaskI(vReg dst, vReg src) %{
!   predicate(UseSVE > 0 &&
-             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
-              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set dst (VectorLoadMask src));
    ins_cost(3 * SVE_COST);
!   format %{ "sve_uunpklo $dst, H, $src\n\t"
-             "sve_uunpklo $dst, S, $dst\n\t"
-             "sve_neg $dst, $dst\t# vector load mask (B to S)" %}
-   ins_encode %{
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
-     __ sve_neg(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vloadmaskL(vReg dst, vReg src) %{
-   predicate(UseSVE > 0 &&
-             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
-              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
-   match(Set dst (VectorLoadMask src));
-   ins_cost(4 * SVE_COST);
-   format %{ "sve_uunpklo $dst, H, $src\n\t"
-             "sve_uunpklo $dst, S, $dst\n\t"
-             "sve_uunpklo $dst, D, $dst\n\t"
-             "sve_neg $dst, $dst\t# vector load mask (B to D)" %}
    ins_encode %{
!     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
!     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
!     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
!     __ sve_neg(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector store mask
  
! instruct vstoremaskB(vReg dst, vReg src, immI_1 size) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorStoreMask src size));
    ins_cost(SVE_COST);
!   format %{ "sve_neg $dst, $src\t# vector store mask (B)" %}
    ins_encode %{
!     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
-                as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vstoremaskS(vReg dst, vReg src, vReg tmp, immI_2 size) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorStoreMask src size));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_dup $tmp, H, 0\n\t"
-             "sve_uzp1 $dst, B, $src, $tmp\n\t"
-             "sve_neg $dst, B, $dst\t# vector store mask (sve) (H to B)" %}
-   ins_encode %{
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
-                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
-                as_FloatRegister($dst$$reg));
- 
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremaskI(vReg dst, vReg src, vReg tmp, immI_4 size) %{
-   predicate(UseSVE > 0);
-   match(Set dst (VectorStoreMask src size));
-   effect(TEMP_DEF dst, TEMP tmp);
-   ins_cost(4 * SVE_COST);
-   format %{ "sve_dup $tmp, S, 0\n\t"
-             "sve_uzp1 $dst, H, $src, $tmp\n\t"
-             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
-             "sve_neg $dst, B, $dst\t# vector store mask (sve) (S to B)" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
!                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
-                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
-                as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vstoremaskL(vReg dst, vReg src, vReg tmp, immI_8 size) %{
-   predicate(UseSVE > 0);
-   match(Set dst (VectorStoreMask src size));
-   effect(TEMP_DEF dst, TEMP tmp);
-   ins_cost(5 * SVE_COST);
-   format %{ "sve_dup $tmp, D, 0\n\t"
-             "sve_uzp1 $dst, S, $src, $tmp\n\t"
-             "sve_uzp1 $dst, H, $dst, $tmp\n\t"
-             "sve_uzp1 $dst, B, $dst, $tmp\n\t"
-             "sve_neg $dst, B, $dst\t# vector store mask (sve) (D to B)" %}
-   ins_encode %{
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ D, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S,
-                 as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H,
-                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B,
-                 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_neg(as_FloatRegister($dst$$reg), __ B, ptrue,
-                as_FloatRegister($dst$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
  
! // load/store mask vector
! 
! instruct vloadmask_loadV_byte(vReg dst, vmemA mem) %{
!   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize &&
-             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) == 1);
    match(Set dst (VectorLoadMask (LoadVector mem)));
!   ins_cost(5 * SVE_COST);
!   format %{ "sve_ld1b $dst, $mem\n\t"
!             "sve_neg $dst, $dst\t# load vector mask (sve)" %}
    ins_encode %{
!     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
      BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt);
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
-                           T_BOOLEAN, to_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
!     __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vloadmask_loadV_non_byte(vReg dst, indirect mem) %{
!   predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() == MaxVectorSize &&
              type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
    match(Set dst (VectorLoadMask (LoadVector mem)));
!   ins_cost(5 * SVE_COST);
!   format %{ "sve_ld1b $dst, $mem\n\t"
!             "sve_neg $dst, $dst\t# load vector mask (sve)" %}
    ins_encode %{
!     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
      BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant to_vect_variant = __ elemType_to_regVariant(to_vect_bt);
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
!                           T_BOOLEAN, to_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
!     __ sve_neg(dst_reg, to_vect_variant, ptrue, dst_reg);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct storeV_vstoremask_byte(vmemA mem, vReg src, vReg tmp, immI_1 esize) %{
!   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() *
!                           n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize);
    match(Set mem (StoreVector mem (VectorStoreMask src esize)));
    effect(TEMP tmp);
!   ins_cost(5 * SVE_COST);
!   format %{ "sve_neg $tmp, $src\n\t"
!             "sve_st1b $tmp, $mem\t# store vector mask (sve)" %}
    ins_encode %{
      BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
      assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
!     Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant);
!     __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue,
-                as_FloatRegister($src$$reg));
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
                            ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct storeV_vstoremask_non_byte(indirect mem, vReg src, vReg tmp, immI_gt_1 esize) %{
!   predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() *
!                           n->as_StoreVector()->in(MemNode::ValueIn)->in(2)->get_int() == MaxVectorSize);
    match(Set mem (StoreVector mem (VectorStoreMask src esize)));
!   effect(TEMP tmp);
!   ins_cost(5 * SVE_COST);
!   format %{ "sve_neg $tmp, $src\n\t"
-             "sve_st1b $tmp, $mem\t# store vector mask (sve)" %}
    ins_encode %{
      BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
!     assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
!     Assembler::SIMD_RegVariant from_vect_variant = __ elemBytes_to_regVariant($esize$$constant);
!     __ sve_neg(as_FloatRegister($tmp$$reg), from_vect_variant, ptrue,
!                as_FloatRegister($src$$reg));
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
-                           ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector add reduction
  
! instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AddReductionVI src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addB/S/I reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
!     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
!     if (bt == T_BYTE) {
!       __ sxtb($dst$$Register, $dst$$Register);
!     } else if (bt == T_SHORT) {
!       __ sxth($dst$$Register, $dst$$Register);
!     } else {
!       assert(bt == T_INT, "unsupported type");
!     }
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AddReductionVI src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
    format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_uaddv(as_FloatRegister($vtmp$$reg), variant,
!                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ addw($dst$$Register, $dst$$Register, $src1$$Register);
-     if (bt == T_BYTE) {
-       __ sxtb($dst$$Register, $dst$$Register);
-     } else if (bt == T_SHORT) {
-       __ sxth($dst$$Register, $dst$$Register);
-     } else {
-       assert(bt == T_INT, "unsupported type");
-     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AddReductionVL src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %}
    ins_encode %{
!     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ add($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (AddReductionVL src1 src2));
-   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_uaddv(as_FloatRegister($vtmp$$reg), __ D,
                   as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
-     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
-     __ add($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
  
! instruct reduce_addF(vRegF src1_dst, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set src1_dst (AddReductionVF src1_dst src2));
    ins_cost(SVE_COST);
!   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
    ins_encode %{
!     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
!          ptrue, as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set src1_dst (AddReductionVF src1_dst src2));
    ins_cost(SVE_COST);
!   effect(TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %}
    ins_encode %{
-     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
-                           Matcher::vector_length(this, $src2));
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
!                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addD(vRegD src1_dst, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set src1_dst (AddReductionVD src1_dst src2));
    ins_cost(SVE_COST);
!   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
!          ptrue, as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set src1_dst (AddReductionVD src1_dst src2));
    ins_cost(SVE_COST);
    effect(TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
                   as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector and reduction
  
! instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andB/S/I reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_andv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
!     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
!     if (bt == T_BYTE) {
!       __ sxtb($dst$$Register, $dst$$Register);
!     } else if (bt == T_SHORT) {
!       __ sxth($dst$$Register, $dst$$Register);
!     } else {
!       assert(bt == T_INT, "unsupported type");
!     }
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
    format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_andv(as_FloatRegister($vtmp$$reg), variant,
!                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ andw($dst$$Register, $dst$$Register, $src1$$Register);
-     if (bt == T_BYTE) {
-       __ sxtb($dst$$Register, $dst$$Register);
-     } else if (bt == T_SHORT) {
-       __ sxth($dst$$Register, $dst$$Register);
-     } else {
-       assert(bt == T_INT, "unsupported type");
-     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %}
    ins_encode %{
!     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (AndReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_andv(as_FloatRegister($vtmp$$reg), __ D,
!                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ andr($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector or reduction
  
! instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orB/S/I reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_orv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
!     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
!     if (bt == T_BYTE) {
!       __ sxtb($dst$$Register, $dst$$Register);
!     } else if (bt == T_SHORT) {
!       __ sxth($dst$$Register, $dst$$Register);
!     } else {
!       assert(bt == T_INT, "unsupported type");
!     }
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
    format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_orv(as_FloatRegister($vtmp$$reg), variant,
!                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ orrw($dst$$Register, $dst$$Register, $src1$$Register);
-     if (bt == T_BYTE) {
-       __ sxtb($dst$$Register, $dst$$Register);
-     } else if (bt == T_SHORT) {
-       __ sxth($dst$$Register, $dst$$Register);
-     } else {
-       assert(bt == T_INT, "unsupported type");
-     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %}
    ins_encode %{
!     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (OrReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_orv(as_FloatRegister($vtmp$$reg), __ D,
!                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
-     __ orr($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector xor reduction
  
! instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (XorReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorB/H/I reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
-     if (bt == T_BYTE) {
-       __ sxtb($dst$$Register, $dst$$Register);
-     } else if (bt == T_SHORT) {
-       __ sxth($dst$$Register, $dst$$Register);
-     } else {
-       assert(bt == T_INT, "unsupported type");
-     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (XorReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# xorI reduction partial (sve) (may extend)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
-                           Matcher::vector_length(this, $src2));
-     __ sve_eorv(as_FloatRegister($vtmp$$reg), variant,
-                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
-     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ eorw($dst$$Register, $dst$$Register, $src1$$Register);
-     if (bt == T_BYTE) {
-       __ sxtb($dst$$Register, $dst$$Register);
-     } else if (bt == T_SHORT) {
-       __ sxth($dst$$Register, $dst$$Register);
-     } else {
-       assert(bt == T_INT, "unsupported type");
-     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (XorReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction (sve)" %}
    ins_encode %{
!     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (XorReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# xorL reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_eorv(as_FloatRegister($vtmp$$reg), __ D,
!                 as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ eor($dst$$Register, $dst$$Register, $src1$$Register);
    %}
    ins_pipe(pipe_slow);
  %}
  
- 
  // vector max reduction
  
! instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxB/S/I (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ cmpw($dst$$Register, $src1$$Register);
-     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
!             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
-              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
-              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# reduce maxI partial (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
-                           Matcher::vector_length(this, $src2));
-     __ sve_smaxv(as_FloatRegister($vtmp$$reg), variant,
-                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
-     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ cmpw($dst$$Register, $src1$$Register);
-     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %}
    ins_encode %{
!     __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ cmp($dst$$Register, $src1$$Register);
!     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MaxReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# reduce maxL partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_smaxv(as_FloatRegister($vtmp$$reg), __ D,
!                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
-     __ cmp($dst$$Register, $src1$$Register);
-     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::GT);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (S)\n\t"
-             "fmaxs $dst, $dst, $src1\t# max reduction F" %}
    ins_encode %{
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
-          ptrue, as_FloatRegister($src2$$reg));
      __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxF_partial(vRegF dst, vRegF src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_maxF $dst, $src1, $src2\t# reduce max S partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this, $src2));
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
-          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_fmaxv $dst, $src2 # vector (sve) (D)\n\t"
-             "fmaxs $dst, $dst, $src1\t# max reduction D" %}
    ins_encode %{
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
-          ptrue, as_FloatRegister($src2$$reg));
      __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxD_partial(vRegD dst, vRegD src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_maxD $dst, $src1, $src2\t# reduce max D partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
-          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector min reduction
  
! instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
!   match(Set dst (MinReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minB/S/I (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_sminv(as_FloatRegister($vtmp$$reg), variant, ptrue, as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
-     __ cmpw($dst$$Register, $src1$$Register);
-     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
!             (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
!              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
!   match(Set dst (MinReductionV src1 src2));
-   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_minI $dst, $src1, $src2\t# reduce minI partial (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_sminv(as_FloatRegister($vtmp$$reg), variant,
!                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ smov($dst$$Register, as_FloatRegister($vtmp$$reg), variant, 0);
!     __ cmpw($dst$$Register, $src1$$Register);
!     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MinReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %}
    ins_encode %{
!     __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
!     __ cmp($dst$$Register, $src1$$Register);
!     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MinReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_reduce_minL $dst, $src1, $src2\t# reduce minL partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_sminv(as_FloatRegister($vtmp$$reg), __ D,
!                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
!     __ umov($dst$$Register, as_FloatRegister($vtmp$$reg), __ D, 0);
-     __ cmp($dst$$Register, $src1$$Register);
-     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($src1$$reg), Assembler::LT);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_fminv $dst, $src2 # vector (sve) (S)\n\t"
-             "fmins $dst, $dst, $src1\t# min reduction F" %}
    ins_encode %{
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
-          ptrue, as_FloatRegister($src2$$reg));
      __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minF_partial(vRegF dst, vRegF src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_minF $dst, $src1, $src2\t# reduce min S partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this, $src2));
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
-          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_fminv $dst, $src2 # vector (sve) (D)\n\t"
-             "fmins $dst, $dst, $src1\t# min reduction D" %}
    ins_encode %{
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
!          ptrue, as_FloatRegister($src2$$reg));
      __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (MinReductionV src1 src2));
!   ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_minD $dst, $src1, $src2\t# reduce min D partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
      __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
!          as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
--- 1715,1521 ---
    ins_pipe(pipe_slow);
  %}
  
  // vector mask compare
  
! instruct vmaskcmp(pRegGov dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
!   effect(KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_cmp $dst, $src1, $src2\t# vector mask cmp (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_compare(as_PRegister($dst$$reg), bt, ptrue, as_FloatRegister($src1$$reg),
                     as_FloatRegister($src2$$reg), (int)$cond$$constant);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmaskcmp_masked(pRegGov dst, vReg src1, vReg src2, immI cond, pRegGov pg, rFlagsReg cr) %{
    predicate(UseSVE > 0);
!   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond pg)));
!   effect(KILL cr);
!   ins_cost(SVE_COST);
!   format %{ "sve_cmp $dst, $pg, $src1, $src2\t# vector mask cmp (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     __ sve_compare(as_PRegister($dst$$reg), bt, as_PRegister($pg$$reg), as_FloatRegister($src1$$reg),
!                    as_FloatRegister($src2$$reg), (int)$cond$$constant);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector blend
  
! instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst (VectorBlend (Binary src1 src2) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_sel $dst, $pg, $src2, $src1\t# vector blend (sve)" %}
    ins_encode %{
!     Assembler::SIMD_RegVariant size =
!                __ elemType_to_regVariant(Matcher::vector_element_basic_type(this));
!     __ sve_sel(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
!                as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector load mask
  
! instruct vloadmaskB(pRegGov dst, vReg src, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorLoadMask src));
+   effect(KILL cr);
    ins_cost(SVE_COST);
!   format %{ "vloadmaskB $dst, $src\t# vector load mask (sve) (B)" %}
    ins_encode %{
!     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ B,
!                ptrue, as_FloatRegister($src$$reg), 0);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() != T_BYTE);
    match(Set dst (VectorLoadMask src));
+   effect(TEMP tmp, KILL cr);
    ins_cost(3 * SVE_COST);
!   format %{ "vloadmask $dst, $src\t# vector load mask (sve) (H/S/D)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_vector_extend(as_FloatRegister($tmp$$reg), size, as_FloatRegister($src$$reg), __ B);
!     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector store mask
  
! instruct vstoremaskB(vReg dst, pRegGov src, immI_1 size) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorStoreMask src size));
    ins_cost(SVE_COST);
!   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (B)" %}
    ins_encode %{
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ B, as_PRegister($src$$reg), 1, false);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vstoremask_narrow(vReg dst, pRegGov src, vReg tmp, immI_gt_1 size) %{
    predicate(UseSVE > 0);
    match(Set dst (VectorStoreMask src size));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "vstoremask $dst, $src\t# vector store mask (sve) (H/S/D)" %}
    ins_encode %{
!     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant((int)$size$$constant);
!     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($src$$reg), 1, false);
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
!                          as_FloatRegister($dst$$reg), size, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // Combine LoadVector+VectorLoadMask when the vector element type is not T_BYTE
  
! instruct vloadmask_loadV(pRegGov dst, indirect mem, vReg tmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->as_Vector()->length_in_bytes() == MaxVectorSize &&
!             type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
    match(Set dst (VectorLoadMask (LoadVector mem)));
!   effect(TEMP tmp, KILL cr);
!   ins_cost(3 * SVE_COST);
!   format %{ "sve_ld1b $tmp, $mem\n\t"
+             "sve_cmpne $dst, $tmp, 0\t# load vector mask (sve) (H/S/D)" %}
    ins_encode %{
!     // Load mask values which are boolean type, and extend them to the
+     // expected vector element type. Convert the vector to predicate.
      BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($tmp$$reg),
!                           ptrue, T_BOOLEAN, to_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
!     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), __ elemType_to_regVariant(to_vect_bt),
+                ptrue, as_FloatRegister($tmp$$reg), 0);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vloadmask_loadV_partial(pRegGov dst, indirect mem, vReg vtmp, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->as_Vector()->length_in_bytes() > 16 &&
+             n->as_Vector()->length_in_bytes() < MaxVectorSize &&
              type2aelembytes(n->bottom_type()->is_vect()->element_basic_type()) > 1);
    match(Set dst (VectorLoadMask (LoadVector mem)));
!   effect(TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(6 * SVE_COST);
!   format %{ "vloadmask_loadV $dst, $mem\t# load vector mask partial (sve) (H/S/D)" %}
    ins_encode %{
!     // Load valid mask values which are boolean type, and extend them to the
+     // expected vector element type. Convert the vector to predicate.
      BasicType to_vect_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(to_vect_bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this));
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), false, as_FloatRegister($vtmp$$reg),
+                           as_PRegister($ptmp$$reg), T_BOOLEAN, to_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
!     __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($vtmp$$reg), 0);
    %}
    ins_pipe(pipe_slow);
  %}
  
! // Combine VectorStoreMask+StoreVector when the vector element type is not T_BYTE
! 
! instruct storeV_vstoremask(indirect mem, pRegGov src, vReg tmp, immI_gt_1 esize) %{
+   predicate(UseSVE > 0 &&
+             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) == MaxVectorSize);
    match(Set mem (StoreVector mem (VectorStoreMask src esize)));
    effect(TEMP tmp);
!   ins_cost(3 * SVE_COST);
!   format %{ "sve_cpy $tmp, $src, 1\n\t"
!             "sve_st1b $tmp, $mem\t# store vector mask (sve) (H/S/D)" %}
    ins_encode %{
      BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
      assert(type2aelembytes(from_vect_bt) == (int)$esize$$constant, "unsupported type.");
!     Assembler::SIMD_RegVariant size = __ elemBytes_to_regVariant($esize$$constant);
!     __ sve_cpy(as_FloatRegister($tmp$$reg), size, as_PRegister($src$$reg), 1, false);
      loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($tmp$$reg),
                            ptrue, T_BOOLEAN, from_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct storeV_vstoremask_partial(indirect mem, pRegGov src, vReg vtmp,
!                                    immI_gt_1 esize, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() > 16 &&
+             type2aelembytes(n->as_StoreVector()->vect_type()->element_basic_type()) > 1 &&
+             Matcher::vector_length_in_bytes(n->as_StoreVector()->in(MemNode::ValueIn)->in(1)) < MaxVectorSize);
    match(Set mem (StoreVector mem (VectorStoreMask src esize)));
!   effect(TEMP vtmp, TEMP ptmp, KILL cr);
!   format %{ "storeV_vstoremask $src, $mem\t# store vector mask partial (sve) (H/S/D)" %}
!   ins_cost(6 * SVE_COST);
    ins_encode %{
+     // Convert the valid src predicate to vector, and store the vector
+     // elements as boolean values.
      BasicType from_vect_bt = Matcher::vector_element_basic_type(this, $src);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(from_vect_bt);
!     __ sve_cpy(as_FloatRegister($vtmp$$reg), size, as_PRegister($src$$reg), 1, false);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
!     loadStoreA_predicated(C2_MacroAssembler(&cbuf), true, as_FloatRegister($vtmp$$reg),
!                           as_PRegister($ptmp$$reg), T_BOOLEAN, from_vect_bt, $mem->opcode(),
                            as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector add reduction
  
! instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AddReductionVI src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
!   %}
!   ins_pipe(pipe_slow);
! %}
! 
! instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (AddReductionVL src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addF(vRegF src1_dst, vReg src2) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set src1_dst (AddReductionVF src1_dst src2));
+   ins_cost(SVE_COST);
+   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+          ptrue, as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addD(vRegD src1_dst, vReg src2) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set src1_dst (AddReductionVD src1_dst src2));
+   ins_cost(SVE_COST);
+   format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
+          ptrue, as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_addI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AddReductionVI src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
    format %{ "sve_reduce_addI $dst, $src1, $src2\t# addI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AddReductionVL src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_reduce_addL $dst, $src1, $src2\t# addL reduction partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addF_partial(vRegF src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set src1_dst (AddReductionVF src1_dst src2));
    ins_cost(SVE_COST);
!   effect(TEMP ptmp, KILL cr);
+   format %{ "sve_reduce_addF $src1_dst, $src1_dst, $src2\t# addF reduction partial (sve) (S)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
!                           Matcher::vector_length(this, $src2));
!     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addD_partial(vRegD src1_dst, vReg src2, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set src1_dst (AddReductionVD src1_dst src2));
    ins_cost(SVE_COST);
!   effect(TEMP ptmp, KILL cr);
+   format %{ "sve_reduce_addD $src1_dst, $src1_dst, $src2\t# addD reduction partial (sve) (D)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
                   as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector add reduction - predicated
  
! instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (AddReductionVI (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addI $dst, $src1, $pg, $src2\t# addI reduction predicated (sve) (may extend)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (AddReductionVL (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addL $dst, $src1, $pg, $src2\t# addL reduction predicated (sve)" %}
!   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addF_masked(vRegF src1_dst, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set src1_dst (AddReductionVF (Binary src1_dst src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_addF $src1_dst, $pg, $src2\t# addF reduction predicated (sve)" %}
    ins_encode %{
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
!                  as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addD_masked(vRegD src1_dst, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set src1_dst (AddReductionVD (Binary src1_dst src2) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_addD $src1_dst, $pg, $src2\t# addD reduction predicated (sve)" %}
    ins_encode %{
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
!                  as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_addI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (AddReductionVI (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_addI $dst, $src1, $pg, $src2\t# addI reduction predicated partial (sve) (may extend)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (AddReductionVL (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_addL $dst, $src1, $pg, $src2\t# addL reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addF_masked_partial(vRegF src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set src1_dst (AddReductionVF (Binary src1_dst src2) pg));
+   effect(TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
+   format %{ "sve_reduce_addF $src1_dst, $pg, $src2\t# addF reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+                  as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_addD_masked_partial(vRegD src1_dst, vReg src2, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set src1_dst (AddReductionVD (Binary src1_dst src2) pg));
    effect(TEMP ptmp, KILL cr);
!   ins_cost(SVE_COST);
+   format %{ "sve_reduce_addD $src1_dst, $pg, $src2\t# addD reduction predicated partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
      __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
                   as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector and reduction
  
! instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
!   %}
!   ins_pipe(pipe_slow);
! %}
! 
! instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (AndReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_andI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
    format %{ "sve_reduce_andI $dst, $src1, $src2\t# andI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_andL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (AndReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_reduce_andL $dst, $src1, $src2\t# andL reduction partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector and reduction - predicated
+ 
+ instruct reduce_andI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (AndReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andI $dst, $src1, $pg, $src2\t# andI reduction predicated (sve) (may extend)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (AndReductionV (Binary src1 src2) pg));
!   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_andL $dst, $src1, $pg, $src2\t# andL reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_andI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (AndReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_andI $dst, $src1, $pg, $src2\t# andI reduction predicated partial (sve) (may extend)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_andL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (AndReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_andL $dst, $src1, $pg, $src2\t# andL reduction predicated partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
!                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector or reduction
  
! instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
!   %}
!   ins_pipe(pipe_slow);
! %}
! 
! instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (OrReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_orI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
    format %{ "sve_reduce_orI $dst, $src1, $src2\t# orI reduction partial (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_orL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (OrReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_reduce_orL $dst, $src1, $src2\t# orL reduction partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector or reduction - predicated
+ 
+ instruct reduce_orI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (OrReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_orI $dst, $src1, $pg, $src2\t# orI reduction predicated (sve) (may extend)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (OrReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_orL $dst, $src1, $pg, $src2\t# orL reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_orI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (OrReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_orI $dst, $src1, $pg, $src2\t# orI reduction predicated partial (sve) (may extend)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_orL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (OrReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_orL $dst, $src1, $pg, $src2\t# orL reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector xor reduction
+ 
+ instruct reduce_eorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (XorReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# eorI reduction (sve) (may extend)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_eorL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (XorReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# eorL reduction (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_eorI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (XorReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_reduce_eorI $dst, $src1, $src2\t# eorI reduction partial (sve) (may extend)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_eorL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                              pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (XorReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_reduce_eorL $dst, $src1, $src2\t# eorL reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector xor reduction - predicated
  
! instruct reduce_eorI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (XorReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorI $dst, $src1, $pg, $src2\t# eorI reduction predicated (sve) (may extend)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, pRegGov pg) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
!   match(Set dst (XorReductionV (Binary src1 src2) pg));
!   effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_eorL $dst, $src1, $pg, $src2\t# eorL reduction predicated (sve)" %}
    ins_encode %{
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (XorReductionV (Binary src1 src2) pg));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_eorI $dst, $src1, $pg, $src2\t# eorI reduction predicated partial (sve) (may extend)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_eorL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
!                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (XorReductionV (Binary src1 src2) pg));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(3 * SVE_COST);
!   format %{ "sve_reduce_eorL $dst, $src1, $pg, $src2\t# eorL reduction predicated partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
!                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector max reduction
  
! instruct reduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# maxI reduction (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# maxL reduction (sve)" %}
    ins_encode %{
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_maxI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
!                              pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
    match(Set dst (MaxReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_reduce_maxI $dst, $src1, $src2\t# maxI reduction partial (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
!                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MaxReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_reduce_maxL $dst, $src1, $src2\t# maxL reduction  partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxF(vRegF dst, vRegF src1, vReg src2) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_reduce_maxF $dst, $src1, $src2\t# maxF reduction (sve)" %}
    ins_encode %{
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src2$$reg));
      __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxF_partial(vRegF dst, vRegF src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_maxF $dst, $src1, $src2\t# maxF reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this, $src2));
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxD(vRegD dst, vRegD src1, vReg src2) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_reduce_maxD $dst, $src1, $src2\t# maxD reduction (sve)" %}
    ins_encode %{
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
      __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_maxD_partial(vRegD dst, vRegD src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MaxReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_maxD $dst, $src1, $src2\t# maxD reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector max reduction - predicated
  
! instruct reduce_maxI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp,
!                            pRegGov pg, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
!             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
!   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxI $dst, $src1, $pg, $src2\t# maxI reduction predicated (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp,
!                           pRegGov pg, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
!   match(Set dst (MaxReductionV (Binary src1 src2) pg));
!   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_maxL $dst, $src1, $pg, $src2\t# maxL reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_maxI $dst, $src1, $pg, $src2\t# maxI reduction predicated partial (sve)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src2);
      Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
                            Matcher::vector_length(this, $src2));
!     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
!                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_maxL $dst, $src1, $pg, $src2\t# maxL reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxF_masked(vRegF dst, vRegF src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_maxF $dst, $src1, $pg, $src2\t# maxF reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxD_masked(vRegD dst, vRegD src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_maxD $dst, $src1, $pg, $src2\t# maxD reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxF_masked_partial(vRegF dst, vRegF src1, vReg src2, pRegGov pg,
+                                     pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_maxF $dst, $src1, $pg, $src2\t# maxF reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ S,
+                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
+     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_maxD_masked_partial(vRegD dst, vRegD src1, vReg src2, pRegGov pg,
+                                     pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (MaxReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_maxD $dst, $src1, $pg, $src2\t# maxD reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_fmaxv(as_FloatRegister($dst$$reg), __ D,
+                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
+     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector min reduction
+ 
+ instruct reduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
+   match(Set dst (MinReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_minI $dst, $src1, $src2\t# minI reduction (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            ptrue, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MinReductionV src1 src2));
!   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_reduce_minL $dst, $src1, $src2\t# minL reduction (sve)" %}
    ins_encode %{
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            ptrue, as_FloatRegister($tmp$$reg));
!   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minI_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
+                              pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(2)->bottom_type()->is_vect()->element_basic_type()));
+   match(Set dst (MinReductionV src1 src2));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(2 * SVE_COST);
+   format %{ "sve_reduce_minI $dst, $src1, $src2\t# minI reduction partial (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minL_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (MinReductionV src1 src2));
    effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_reduce_minL $dst, $src1, $src2\t# minL reduction  partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
!     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
!                            $src1$$Register, as_FloatRegister($src2$$reg),
!                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minF(vRegF dst, vRegF src1, vReg src2) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_reduce_minF $dst, $src1, $src2\t# minF reduction (sve)" %}
    ins_encode %{
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src2$$reg));
      __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minF_partial(vRegF dst, vRegF src1, vReg src2,
                               pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   format %{ "sve_reduce_minF $dst, $src1, $src2\t# minF reduction partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this, $src2));
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ S, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct reduce_minD(vRegD dst, vRegD src1, vReg src2) %{
!   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
              n->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (MinReductionV src1 src2));
    ins_cost(INSN_COST);
    effect(TEMP_DEF dst);
!   format %{ "sve_reduce_minD $dst, $src1, $src2\t# minD reduction (sve)" %}
    ins_encode %{
!     __ sve_fminv(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src2$$reg));
!     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minD_partial(vRegD dst, vRegD src1, vReg src2,
+                              pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+             n->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (MinReductionV src1 src2));
+   ins_cost(INSN_COST);
+   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
+   format %{ "sve_reduce_minD $dst, $src1, $src2\t# minD reduction partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_fminv(as_FloatRegister($dst$$reg), __ D, as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
+     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector min reduction - predicated
+ 
+ instruct reduce_minI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp,
+                            pRegGov pg, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_minI $dst, $src1, $pg, $src2\t# minI reduction predicated (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp,
+                           pRegGov pg, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_minL $dst, $src1, $pg, $src2\t# minL reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($pg$$reg), as_FloatRegister($tmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minI_masked_partial(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() != T_LONG &&
+             is_integral_type(n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type()));
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_minI $dst, $src1, $pg, $src2\t# minI reduction predicated partial (sve)" %}
+   ins_encode %{
+     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+     Assembler::SIMD_RegVariant variant = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minL_masked_partial(iRegLNoSp dst, iRegL src1, vReg src2, vRegD vtmp,
+                                   pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP vtmp, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_minL $dst, $src1, $pg, $src2\t# minL reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+                            $src1$$Register, as_FloatRegister($src2$$reg),
+                            as_PRegister($ptmp$$reg), as_FloatRegister($vtmp$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minF_masked(vRegF dst, vRegF src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_minF $dst, $src1, $pg, $src2\t# minF reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_fminv(as_FloatRegister($dst$$reg), __ S, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
+     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minD_masked(vRegD dst, vRegD src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_reduce_minD $dst, $src1, $pg, $src2\t# minD reduction predicated (sve)" %}
+   ins_encode %{
+     __ sve_fminv(as_FloatRegister($dst$$reg), __ D, as_PRegister($pg$$reg), as_FloatRegister($src2$$reg));
      __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct reduce_minF_masked_partial(vRegF dst, vRegF src1, vReg src2, pRegGov pg,
!                                     pRegGov ptmp, rFlagsReg cr) %{
!   predicate(UseSVE > 0 &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT &&
!             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
!   match(Set dst (MinReductionV (Binary src1 src2) pg));
+   effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_minF $dst, $src1, $pg, $src2\t# minF reduction predicated partial (sve)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
+                           Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_fminv(as_FloatRegister($dst$$reg), __ S,
+                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
+     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct reduce_minD_masked_partial(vRegD dst, vRegD src1, vReg src2, pRegGov pg,
+                                     pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE &&
+             n->in(1)->in(2)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
+   match(Set dst (MinReductionV (Binary src1 src2) pg));
    effect(TEMP_DEF dst, TEMP ptmp, KILL cr);
!   ins_cost(3 * SVE_COST);
+   format %{ "sve_reduce_minD $dst, $src1, $pg, $src2\t# minD reduction predicated partial (sve)" %}
    ins_encode %{
      __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src2));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
      __ sve_fminv(as_FloatRegister($dst$$reg), __ D,
!                as_PRegister($ptmp$$reg), as_FloatRegister($src2$$reg));
      __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  

*** 2464,143 ***
           as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
    predicate(UseSVE > 0);
!   match(Set dst (URShiftVL src (RShiftCntV shift)));
    ins_cost(SVE_COST);
!   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     if (con == 0) {
!       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
-            as_FloatRegister($src$$reg));
-       return;
-     }
-     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
-          as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
    predicate(UseSVE > 0);
!   match(Set dst (LShiftVB src (LShiftCntV shift)));
    ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     if (con >= 8) {
!       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
-            as_FloatRegister($src$$reg));
-       return;
-     }
-     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
-          as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
    predicate(UseSVE > 0);
!   match(Set dst (LShiftVS src (LShiftCntV shift)));
    ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     if (con >= 16) {
!       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
-            as_FloatRegister($src$$reg));
-       return;
-     }
-     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
-          as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
    predicate(UseSVE > 0);
!   match(Set dst (LShiftVI src (LShiftCntV shift)));
    ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
!          as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
    predicate(UseSVE > 0);
!   match(Set dst (LShiftVL src (LShiftCntV shift)));
    ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
!          as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
!   predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
!   match(Set dst (LShiftCntV cnt));
!   match(Set dst (RShiftCntV cnt));
-   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
!   predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
!             (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
!   match(Set dst (LShiftCntV cnt));
-   match(Set dst (RShiftCntV cnt));
-   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
!   predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
!   match(Set dst (LShiftCntV cnt));
!   match(Set dst (RShiftCntV cnt));
-   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
!   predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
!   match(Set dst (LShiftCntV cnt));
!   match(Set dst (RShiftCntV cnt));
-   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector sqrt
  
  instruct vsqrtF(vReg dst, vReg src) %{
!   predicate(UseSVE > 0);
    match(Set dst (SqrtVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
--- 3655,458 ---
           as_FloatRegister($src$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+   predicate(UseSVE > 0);
+   match(Set dst (URShiftVL src (RShiftCntV shift)));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     if (con == 0) {
+       __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+            as_FloatRegister($src$$reg));
+       return;
+     }
+     __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+          as_FloatRegister($src$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+   predicate(UseSVE > 0);
+   match(Set dst (LShiftVB src (LShiftCntV shift)));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     if (con >= 8) {
+       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+            as_FloatRegister($src$$reg));
+       return;
+     }
+     __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+          as_FloatRegister($src$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+   predicate(UseSVE > 0);
+   match(Set dst (LShiftVS src (LShiftCntV shift)));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     if (con >= 16) {
+       __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+            as_FloatRegister($src$$reg));
+       return;
+     }
+     __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+          as_FloatRegister($src$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+   predicate(UseSVE > 0);
+   match(Set dst (LShiftVI src (LShiftCntV shift)));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+          as_FloatRegister($src$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+   predicate(UseSVE > 0);
+   match(Set dst (LShiftVL src (LShiftCntV shift)));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+          as_FloatRegister($src$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+   predicate(UseSVE > 0 &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
+   match(Set dst (LShiftCntV cnt));
+   match(Set dst (RShiftCntV cnt));
+   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
+   ins_encode %{
+     __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+   predicate(UseSVE > 0 &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+             (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
+   match(Set dst (LShiftCntV cnt));
+   match(Set dst (RShiftCntV cnt));
+   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
+   ins_encode %{
+     __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+   predicate(UseSVE > 0 &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
+   match(Set dst (LShiftCntV cnt));
+   match(Set dst (RShiftCntV cnt));
+   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
+   ins_encode %{
+     __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+   predicate(UseSVE > 0 &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+   match(Set dst (LShiftCntV cnt));
+   match(Set dst (RShiftCntV cnt));
+   format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
+   ins_encode %{
+     __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // vector shift - predicated
+ 
+ instruct vasrB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (RShiftVB (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_asr(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (RShiftVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_asr(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (RShiftVI (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_asr(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (RShiftVL (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_asr(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (LShiftVB (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (LShiftVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (LShiftVI (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlslL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (LShiftVL (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsl $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_lsl(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlsrB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (URShiftVB (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
+   ins_encode %{
+     __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlsrS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (URShiftVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlsrI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (URShiftVI (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vlsrL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (URShiftVL (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_lsr $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_lsr(as_FloatRegister($dst_src1$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (RShiftVB (Binary dst_src (RShiftCntV shift)) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     assert(con > 0 && con < 8, "invalid shift immediate");
+     __ sve_asr(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (RShiftVS (Binary dst_src (RShiftCntV shift)) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     assert(con > 0 && con < 16, "invalid shift immediate");
+     __ sve_asr(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (RShiftVI (Binary dst_src (RShiftCntV shift)) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %}
+   ins_encode %{
+     int con = (int)$shift$$constant;
+     assert(con > 0 && con < 32, "invalid shift immediate");
+     __ sve_asr(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vasrL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src (RShiftVL (Binary dst_src (RShiftCntV shift)) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_asr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     assert(con > 0 && con < 64, "invalid shift immediate");
!     __ sve_asr(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src (URShiftVB (Binary dst_src (RShiftCntV shift)) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     assert(con > 0 && con < 8, "invalid shift immediate");
!     __ sve_lsr(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src (URShiftVS (Binary dst_src (RShiftCntV shift)) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     assert(con > 0 && con < 16, "invalid shift immediate");
!     __ sve_lsr(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src (URShiftVI (Binary dst_src (RShiftCntV shift)) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     assert(con > 0 && con < 32, "invalid shift immediate");
!     __ sve_lsr(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlsrL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
    predicate(UseSVE > 0);
!   match(Set dst_src (URShiftVL (Binary dst_src (RShiftCntV shift)) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_lsr $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %}
    ins_encode %{
      int con = (int)$shift$$constant;
!     assert(con > 0 && con < 64, "invalid shift immediate");
!     __ sve_lsr(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslB_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src (LShiftVB (Binary dst_src (LShiftCntV shift)) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (B)" %}
    ins_encode %{
!     int con = (int)$shift$$constant;
+     assert(con >= 0 && con < 8, "invalid shift immediate");
+     __ sve_lsl(as_FloatRegister($dst_src$$reg), __ B, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslS_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src (LShiftVS (Binary dst_src (LShiftCntV shift)) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (H)" %}
    ins_encode %{
!     int con = (int)$shift$$constant;
+     assert(con >= 0 && con < 16, "invalid shift immediate");
+     __ sve_lsl(as_FloatRegister($dst_src$$reg), __ H, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslI_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src (LShiftVI (Binary dst_src (LShiftCntV shift)) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (S)" %}
    ins_encode %{
!     int con = (int)$shift$$constant;
+     assert(con >= 0 && con < 32, "invalid shift immediate");
+     __ sve_lsl(as_FloatRegister($dst_src$$reg), __ S, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vlslL_imm_masked(vReg dst_src, immI shift, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src (LShiftVL (Binary dst_src (LShiftCntV shift)) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_lsl $dst_src, $pg, $dst_src, $shift\t# vector (sve) (D)" %}
    ins_encode %{
!     int con = (int)$shift$$constant;
+     assert(con >= 0 && con < 64, "invalid shift immediate");
+     __ sve_lsl(as_FloatRegister($dst_src$$reg), __ D, as_PRegister($pg$$reg), con);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector sqrt
  
  instruct vsqrtF(vReg dst, vReg src) %{
!   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector());
    match(Set dst (SqrtVF src));
    ins_cost(SVE_COST);
    format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
    ins_encode %{
      __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,

*** 2608,21 ***
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vsqrtD(vReg dst, vReg src) %{
!   predicate(UseSVE > 0);
    match(Set dst (SqrtVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // vector sub
  
  instruct vsubB(vReg dst, vReg src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst (SubVB src1 src2));
--- 4114,50 ---
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vsqrtD(vReg dst, vReg src) %{
!   predicate(UseSVE > 0 &&
+             !n->as_Vector()->is_predicated_vector());
    match(Set dst (SqrtVD src));
    ins_cost(SVE_COST);
    format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
    ins_encode %{
      __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
           ptrue, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ // vector sqrt - predicated
+ 
+ instruct vsqrtF_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (SqrtVF dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fsqrt $dst_src, $pg, $dst_src\t# vector (sve) (S)" %}
+   ins_encode %{
+     __ sve_fsqrt(as_FloatRegister($dst_src$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct vsqrtD_masked(vReg dst_src, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src (SqrtVD dst_src pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_fsqrt $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
+   ins_encode %{
+     __ sve_fsqrt(as_FloatRegister($dst_src$$reg), __ D,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($dst_src$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
  // vector sub
  
  instruct vsubB(vReg dst, vReg src1, vReg src2) %{
    predicate(UseSVE > 0);
    match(Set dst (SubVB src1 src2));

*** 2699,179 ***
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector mask cast
  
! instruct vmaskcast(vReg dst) %{
!   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
!             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
!   match(Set dst (VectorMaskCast dst));
!   ins_cost(0);
-   format %{ "vmaskcast $dst\t# empty (sve)" %}
    ins_encode %{
!     // empty
    %}
!   ins_pipe(pipe_class_empty);
  %}
  
! // ------------------------------ Vector cast -------------------------------
  
! instruct vcvtBtoS(vReg dst, vReg src)
! %{
!   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-   match(Set dst (VectorCastB2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_sunpklo  $dst, H, $src\t# convert B to S vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtBtoI(vReg dst, vReg src)
! %{
!   predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
!   match(Set dst (VectorCastB2X src));
-   ins_cost(2 * SVE_COST);
-   format %{ "sve_sunpklo  $dst, H, $src\n\t"
-             "sve_sunpklo  $dst, S, $dst\t# convert B to I vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtBtoL(vReg dst, vReg src)
! %{
!   predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
!   match(Set dst (VectorCastB2X src));
-   ins_cost(3 * SVE_COST);
-   format %{ "sve_sunpklo  $dst, H, $src\n\t"
-             "sve_sunpklo  $dst, S, $dst\n\t"
-             "sve_sunpklo  $dst, D, $dst\t# convert B to L vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtBtoF(vReg dst, vReg src)
! %{
!   predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
!   match(Set dst (VectorCastB2X src));
-   ins_cost(3 * SVE_COST);
-   format %{ "sve_sunpklo  $dst, H, $src\n\t"
-             "sve_sunpklo  $dst, S, $dst\n\t"
-             "sve_scvtf  $dst, S, $dst, S\t# convert B to F vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
!     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtBtoD(vReg dst, vReg src)
! %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
!   match(Set dst (VectorCastB2X src));
!   ins_cost(4 * SVE_COST);
!   format %{ "sve_sunpklo  $dst, H, $src\n\t"
!             "sve_sunpklo  $dst, S, $dst\n\t"
!             "sve_sunpklo  $dst, D, $dst\n\t"
!             "sve_scvtf  $dst, D, $dst, D\t# convert B to D vector" %}
-   ins_encode %{
-     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
-     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
-     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
-     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
    %}
!   ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoB(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
!   match(Set dst (VectorCastS2X src));
!   effect(TEMP tmp);
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_dup  $tmp, B, 0\n\t"
!             "sve_uzp1  $dst, B, $src, tmp\t# convert S to B vector" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ B, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoI(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
!   match(Set dst (VectorCastS2X src));
!   ins_cost(SVE_COST);
!   format %{ "sve_sunpklo  $dst, S, $src\t# convert S to I vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoL(vReg dst, vReg src)
  %{
!   predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-   match(Set dst (VectorCastS2X src));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_sunpklo  $dst, S, $src\n\t"
-             "sve_sunpklo  $dst, D, $dst\t# convert S to L vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoF(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastS2X src));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_sunpklo  $dst, S, $src\n\t"
-             "sve_scvtf  $dst, S, $dst, S\t# convert S to F vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
!     __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoD(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorCastS2X src));
!   ins_cost(3 * SVE_COST);
!   format %{ "sve_sunpklo  $dst, S, $src\n\t"
-             "sve_sunpklo  $dst, D, $dst\n\t"
-             "sve_scvtf  $dst, D, $dst, D\t# convert S to D vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
!     __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoB(vReg dst, vReg src, vReg tmp)
--- 4234,185 ---
           as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // vector sub - predicated
  
! instruct vsubB_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src1 (SubVB (Binary dst_src1 src2) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (B)" %}
    ins_encode %{
!     __ sve_sub(as_FloatRegister($dst_src1$$reg), __ B,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
    %}
!   ins_pipe(pipe_slow);
  %}
  
! instruct vsubS_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+   predicate(UseSVE > 0);
+   match(Set dst_src1 (SubVS (Binary dst_src1 src2) pg));
+   ins_cost(SVE_COST);
+   format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (H)" %}
+   ins_encode %{
+     __ sve_sub(as_FloatRegister($dst_src1$$reg), __ H,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
  
! instruct vsubI_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src1 (SubVI (Binary dst_src1 src2) pg));
    ins_cost(SVE_COST);
!   format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
    ins_encode %{
!     __ sve_sub(as_FloatRegister($dst_src1$$reg), __ S,
+             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vsubL_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src1 (SubVL (Binary dst_src1 src2) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_sub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
    ins_encode %{
!     __ sve_sub(as_FloatRegister($dst_src1$$reg), __ D,
!             as_PRegister($pg$$reg),
+             as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vsubF_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src1 (SubVF (Binary dst_src1 src2) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_fsub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (S)" %}
    ins_encode %{
!     __ sve_fsub(as_FloatRegister($dst_src1$$reg), __ S,
!             as_PRegister($pg$$reg),
!             as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vsubD_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
!   predicate(UseSVE > 0);
!   match(Set dst_src1 (SubVD (Binary dst_src1 src2) pg));
!   ins_cost(SVE_COST);
!   format %{ "sve_fsub $dst_src1, $pg, $dst_src1, $src2\t# vector (sve) (D)" %}
    ins_encode %{
!     __ sve_fsub(as_FloatRegister($dst_src1$$reg), __ D,
!             as_PRegister($pg$$reg),
!             as_FloatRegister($src2$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // ------------------------------ Vector mask cast --------------------------
! 
+ instruct vmaskcast(pRegGov dst_src) %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length() &&
!             n->bottom_type()->is_vect()->length_in_bytes() == n->in(1)->bottom_type()->is_vect()->length_in_bytes());
!   match(Set dst_src (VectorMaskCast dst_src));
!   ins_cost(0);
!   format %{ "vmaskcast $dst_src\t# empty (sve)" %}
!   ins_encode %{
!     // empty
    %}
!   ins_pipe(pipe_class_empty);
  %}
  
! instruct vmaskcast_extend(pRegGov dst, pReg src)
  %{
    predicate(UseSVE > 0 &&
!             (Matcher::vector_length_in_bytes(n) == 2 * Matcher::vector_length_in_bytes(n->in(1)) ||
!              Matcher::vector_length_in_bytes(n) == 4 * Matcher::vector_length_in_bytes(n->in(1)) ||
!              Matcher::vector_length_in_bytes(n) == 8 * Matcher::vector_length_in_bytes(n->in(1))));
!   match(Set dst (VectorMaskCast src));
!   ins_cost(SVE_COST * 3);
!   format %{ "sve_vmaskcast_extend  $dst, $src\t# extend predicate $src" %}
    ins_encode %{
!     __ sve_vmaskcast_extend(as_PRegister($dst$$reg), as_PRegister($src$$reg),
!                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmaskcast_narrow(pRegGov dst, pReg src)
  %{
    predicate(UseSVE > 0 &&
!             (Matcher::vector_length_in_bytes(n) * 2 == Matcher::vector_length_in_bytes(n->in(1)) ||
!              Matcher::vector_length_in_bytes(n) * 4 == Matcher::vector_length_in_bytes(n->in(1)) ||
!              Matcher::vector_length_in_bytes(n) * 8 == Matcher::vector_length_in_bytes(n->in(1))));
!   match(Set dst (VectorMaskCast src));
+   ins_cost(SVE_COST * 3);
+   format %{ "sve_vmaskcast_narrow  $dst, $src\t# narrow predicate $src" %}
    ins_encode %{
!     __ sve_vmaskcast_narrow(as_PRegister($dst$$reg), as_PRegister($src$$reg),
+                             Matcher::vector_length_in_bytes(this), Matcher::vector_length_in_bytes(this, $src));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // ------------------------------ Vector cast -------------------------------
+ 
+ instruct vcvtBtoX_extend(vReg dst, vReg src)
  %{
!   predicate(UseSVE > 0);
!   match(Set dst (VectorCastB2X src));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_b2x  $dst, $src\t# convert B to X vector (extend)" %}
    ins_encode %{
!     BasicType to_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
+     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ B);
+     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
+       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
+     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoB(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorCastS2X src));
+   effect(TEMP tmp);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_s2b  $dst, $src\t# convert H to B vector" %}
    ins_encode %{
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
!                          as_FloatRegister($src$$reg), __ H, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtStoX_extend(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
!             type2aelembytes(Matcher::vector_element_basic_type(n)) > 2);
    match(Set dst (VectorCastS2X src));
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_s2x  $dst, $src\t# convert H to X vector (extend)" %}
    ins_encode %{
!     BasicType to_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
!     __ sve_vector_extend(as_FloatRegister($dst$$reg), to_size, as_FloatRegister($src$$reg), __ H);
+     if (to_bt == T_FLOAT || to_bt == T_DOUBLE) {
+       __ sve_scvtf(as_FloatRegister($dst$$reg), to_size, ptrue, as_FloatRegister($dst$$reg), to_size);
+     }
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoB(vReg dst, vReg src, vReg tmp)

*** 2879,17 ***
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorCastI2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_dup  $tmp, H, 0\n\t"
-             "sve_uzp1  $dst, H, $src, tmp\n\t"
-             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert I to B vector" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoS(vReg dst, vReg src, vReg tmp)
--- 4420,14 ---
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorCastI2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_vectorcast_i2b  $dst, $src\t# convert I to B vector" %}
    ins_encode %{
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ B,
!                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoS(vReg dst, vReg src, vReg tmp)

*** 2897,39 ***
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
    match(Set dst (VectorCastI2X src));
    effect(TEMP tmp);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_dup  $tmp, H, 0\n\t"
-             "sve_uzp1  $dst, H, $src, tmp\t# convert I to S vector" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoL(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorCastI2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_sunpklo  $dst, D, $src\t# convert I to L vector" %}
    ins_encode %{
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoF(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastI2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_scvtf  $dst, S, $src, S\t# convert I to F vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
--- 4435,38 ---
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
    match(Set dst (VectorCastI2X src));
    effect(TEMP tmp);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_i2s $dst, $src\t# convert I to H vector" %}
    ins_encode %{
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ H,
!                          as_FloatRegister($src$$reg), __ S, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoL(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorCastI2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_vectorcast_i2l  $dst, $src\t# convert I to L vector" %}
    ins_encode %{
!     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtItoF(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastI2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_vectorcast_i2f  $dst, $src\t# convert I to F vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}

*** 2938,69 ***
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorCastI2X src));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_sunpklo  $dst, D, $src\n\t"
-             "sve_scvtf  $dst, D, $dst, D\t# convert I to D vector" %}
    ins_encode %{
      __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtLtoB(vReg dst, vReg src, vReg tmp)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-   match(Set dst (VectorCastL2X src));
-   effect(TEMP_DEF dst, TEMP tmp);
-   ins_cost(4 * SVE_COST);
-   format %{ "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $src, tmp\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\n\t"
-             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert L to B vector" %}
-   ins_encode %{
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtLtoS(vReg dst, vReg src, vReg tmp)
  %{
!   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
    match(Set dst (VectorCastL2X src));
    effect(TEMP_DEF dst, TEMP tmp);
-   ins_cost(3 * SVE_COST);
-   format %{ "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $src, tmp\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\n\t# convert L to S vector" %}
-   ins_encode %{
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtLtoI(vReg dst, vReg src, vReg tmp)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-   match(Set dst (VectorCastL2X src));
-   effect(TEMP tmp);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $src, tmp\t# convert L to I vector" %}
    ins_encode %{
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)
--- 4475,30 ---
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorCastI2X src));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_i2d  $dst, $src\t# convert I to D vector" %}
    ins_encode %{
      __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtLtoX_narrow(vReg dst, vReg src, vReg tmp)
  %{
!   predicate(UseSVE > 0 && is_integral_type(Matcher::vector_element_basic_type(n)));
    match(Set dst (VectorCastL2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_l2x  $dst, $src\t# convert L to B/H/S vector (narrow)" %}
    ins_encode %{
!     BasicType to_bt = Matcher::vector_element_basic_type(this);
!     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
+     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
+                          as_FloatRegister($src$$reg), __ D, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)

*** 3008,182 ***
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastL2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_scvtf  $dst, S, $src, D\n\t"
-             "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $dst, $tmp\t# convert L to F vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtLtoD(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorCastL2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_scvtf  $dst, D, $src, D\t# convert L to D vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtFtoB(vReg dst, vReg src, vReg tmp)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-   match(Set dst (VectorCastF2X src));
-   effect(TEMP_DEF dst, TEMP tmp);
-   ins_cost(4 * SVE_COST);
-   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
-             "sve_dup  $tmp, H, 0\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\n\t"
-             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert F to B vector" %}
-   ins_encode %{
-     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtFtoS(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
    match(Set dst (VectorCastF2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
-             "sve_dup  $tmp, H, 0\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\t# convert F to S vector" %}
-   ins_encode %{
-     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
-     __ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtFtoI(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-   match(Set dst (VectorCastF2X src));
-   ins_cost(SVE_COST);
-   format %{ "sve_fcvtzs  $dst, S, $src, S\t# convert F to I vector" %}
-   ins_encode %{
-     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtFtoL(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-   match(Set dst (VectorCastF2X src));
-   ins_cost(2 * SVE_COST);
-   format %{ "sve_fcvtzs  $dst, S, $src, S\n\t"
-             "sve_sunpklo  $dst, D, $dst\t# convert F to L vector" %}
    ins_encode %{
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
!     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
!   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vcvtFtoD(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 &&
-             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-   match(Set dst (VectorCastF2X src));
-   ins_cost(2 * SVE_COST);
-   format %{ "sve_sunpklo  $dst, D, $src\n\t"
-             "sve_fcvt  $dst, D, $dst, S\t# convert F to D vector" %}
-   ins_encode %{
-     __ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
-     __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtDtoB(vReg dst, vReg src, vReg tmp)
  %{
!   predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
!   match(Set dst (VectorCastD2X src));
!   effect(TEMP_DEF dst, TEMP tmp);
!   ins_cost(5 * SVE_COST);
!   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
-             "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $dst, tmp\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\n\t"
-             "sve_uzp1  $dst, B, $dst, tmp\n\t# convert D to B vector" %}
    ins_encode %{
!     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtDtoS(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
!   match(Set dst (VectorCastD2X src));
!   effect(TEMP_DEF dst, TEMP tmp);
!   ins_cost(4 * SVE_COST);
-   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
-             "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $dst, tmp\n\t"
-             "sve_uzp1  $dst, H, $dst, tmp\n\t# convert D to S vector" %}
    ins_encode %{
!     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
-     __ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtDtoI(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
    match(Set dst (VectorCastD2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_fcvtzs  $dst, D, $src, D\n\t"
-             "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $dst, tmp\t# convert D to I vector" %}
    ins_encode %{
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtDtoL(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorCastD2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_fcvtzs  $dst, D, $src, D\t# convert D to L vector" %}
    ins_encode %{
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
--- 4506,111 ---
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastL2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_vectorcast_l2f  $dst, $src\t# convert L to F vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
!                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
+ 
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtLtoD(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorCastL2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_vectorcast_l2d  $dst, $src\t# convert L to D vector" %}
    ins_encode %{
      __ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtFtoX_narrow(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT));
    match(Set dst (VectorCastF2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to B/H vector" %}
    ins_encode %{
+     BasicType to_bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
!                          as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtFtoX_extend(vReg dst, vReg src)
  %{
!   predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
!              n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
!   match(Set dst (VectorCastF2X src));
!   ins_cost(SVE_COST);
!   format %{ "sve_vectorcast_f2x  $dst, $src\t# convert F to I/L vector" %}
    ins_encode %{
!     BasicType to_bt = Matcher::vector_element_basic_type(this);
!     __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
!     if (to_bt == T_LONG) {
!       __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg), __ S);
!     }
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtFtoD(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
!             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
!   match(Set dst (VectorCastF2X src));
!   ins_cost(2 * SVE_COST);
!   format %{ "sve_vectorcast_f2d  $dst, $dst\t# convert F to D vector" %}
    ins_encode %{
!     __ sve_vector_extend(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), __ S);
!     __ sve_fcvt(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ S);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vcvtDtoX_narrow(vReg dst, vReg src, vReg tmp)
  %{
    predicate(UseSVE > 0 &&
!             (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (VectorCastD2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_vectorcast_d2x  $dst, $src\t# convert D to X vector (narrow)" %}
    ins_encode %{
+     BasicType to_bt = Matcher::vector_element_basic_type(this);
+     Assembler::SIMD_RegVariant to_size = __ elemType_to_regVariant(to_bt);
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), to_size,
!                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct vcvtDtoL(vReg dst, vReg src)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorCastD2X src));
    ins_cost(SVE_COST);
!   format %{ "sve_vectorcast_d2l  $dst, $src\t# convert D to L vector" %}
    ins_encode %{
      __ sve_fcvtzs(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
    %}
    ins_pipe(pipe_slow);
  %}

*** 3193,402 ***
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastD2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_fcvt  $dst, S, $src, D\n\t"
-             "sve_dup  $tmp, S, 0\n\t"
-             "sve_uzp1  $dst, S, $dst, $tmp\t# convert D to F vector" %}
    ins_encode %{
      __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
!     __ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  // ------------------------------ Vector extract ---------------------------------
  
! instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractB src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, B, $pTmp, $src, $idx\n\t"
              "sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
      __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractS src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, H, $pTmp, $src, $idx\n\t"
              "sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
      __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
    %}
    ins_pipe(pipe_slow);
  %}
  
  
! instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractI src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(I)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractL src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(L)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractF src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, S, $pTmp, $src, $idx\t# extract from vector(F)" %}
    ins_encode %{
!     __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractD src idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, D, $pTmp, $src, $idx\t# extract from vector(D)" %}
    ins_encode %{
!     __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------- VectorTest ----------------------------------
  
! instruct vtest_alltrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_cmpeq $pTmp, $src1, 0\n\t"
              "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %}
    ins_encode %{
!     // "src2" is not used for sve.
!     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
-     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
-     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
-                ptrue, as_FloatRegister($src1$$reg), 0);
      __ csetw(as_Register($dst$$reg), Assembler::EQ);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_anytrue(iRegINoSp dst, vReg src1, vReg src2, pReg pTmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_cmpeq $pTmp, $src1, -1\n\t"
              "csetw $dst, NE\t# VectorTest (sve) - anytrue" %}
    ins_encode %{
      // "src2" is not used for sve.
!     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
-     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
-     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
-                ptrue, as_FloatRegister($src1$$reg), -1);
      __ csetw(as_Register($dst$$reg), Assembler::NE);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_alltrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(SVE_COST);
    format %{ "vtest_alltrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - alltrue" %}
    ins_encode %{
-     // "src2" is not used for sve.
      BasicType bt = Matcher::vector_element_basic_type(this, $src1);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size,
                            Matcher::vector_length(this, $src1));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
!                as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), 0);
      __ csetw(as_Register($dst$$reg), Assembler::EQ);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_anytrue_partial(iRegINoSp dst, vReg src1, vReg src2, pRegGov pTmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(SVE_COST);
    format %{ "vtest_anytrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - anytrue" %}
    ins_encode %{
-     // "src2" is not used for sve.
      BasicType bt = Matcher::vector_element_basic_type(this, $src1);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), size,
                            Matcher::vector_length(this, $src1));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size,
!                as_PRegister($pTmp$$reg), as_FloatRegister($src1$$reg), -1);
      __ csetw(as_Register($dst$$reg), Assembler::NE);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector insert ---------------------------------
  
! instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
               n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
    ins_cost(4 * SVE_COST);
!   format %{ "sve_index $dst, -16, 1\t# (B/S/I)\n\t"
!             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, S, -16, 1\n\t"
!             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
               n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "sve_index $tmp1, 0, 1\t# (B/S/I)\n\t"
!             "sve_dup $dst, $idx\t# (B/S/I)\n\t"
!             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (B/S/I)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1);
      __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), size, ptrue,
                 as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pTmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, D, -16, 1\n\t"
!             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (L)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pTmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, D, -16, 1\n\t"
!             "sve_cmpeq $pTmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (D)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ D, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pTmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP tmp1, TEMP pTmp, KILL cr);
    ins_cost(5 * SVE_COST);
    format %{ "sve_index $tmp1, S, 0, 1\n\t"
              "sve_dup $dst, S, $idx\n\t"
!             "sve_cmpeq $pTmp, $tmp1, $dst\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pTmp, $val\t# insert into vector (F)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
      __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pTmp$$reg), __ S, ptrue,
                 as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
      __ sve_orr(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src$$reg),
                 as_FloatRegister($src$$reg));
      __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
!                as_PRegister($pTmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector shuffle -------------------------------
  
! instruct loadshuffleB(vReg dst, vReg src)
! %{
-   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
    match(Set dst (VectorLoadShuffle src));
    ins_cost(SVE_COST);
!   format %{ "sve_orr $dst, $src, $src\t# vector load shuffle (B)" %}
    ins_encode %{
!     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
!       __ sve_orr(as_FloatRegister($dst$$reg),
!                  as_FloatRegister($src$$reg),
!                  as_FloatRegister($src$$reg));
      }
    %}
    ins_pipe(pipe_slow);
  %}
  
- instruct loadshuffleS(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-   match(Set dst (VectorLoadShuffle src));
-   ins_cost(SVE_COST);
-   format %{ "sve_uunpklo $dst, $src\t# vector load shuffle (B to H)" %}
-   ins_encode %{
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct loadshuffleI(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 &&
-            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
-             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
-   match(Set dst (VectorLoadShuffle src));
-   ins_cost(2 * SVE_COST);
-   format %{ "sve_uunpklo $dst, H, $src\n\t"
-             "sve_uunpklo $dst, S, $dst\t# vector load shuffle (B to S)" %}
-   ins_encode %{
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct loadshuffleL(vReg dst, vReg src)
- %{
-   predicate(UseSVE > 0 &&
-            (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
-             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
-   match(Set dst (VectorLoadShuffle src));
-   ins_cost(3 * SVE_COST);
-   format %{ "sve_uunpklo $dst, H, $src\n\t"
-             "sve_uunpklo $dst, S, $dst\n\t"
-             "sve_uunpklo $dst, D, $dst\t# vector load shuffle (B to D)" %}
-   ins_encode %{
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
-     __ sve_uunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
  // ------------------------------ Vector rearrange -------------------------------
  
  instruct rearrange(vReg dst, vReg src, vReg shuffle)
  %{
    predicate(UseSVE > 0);
--- 4620,355 ---
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorCastD2X src));
    effect(TEMP_DEF dst, TEMP tmp);
    ins_cost(3 * SVE_COST);
!   format %{ "sve_vectorcast_d2f  $dst, S, $dst\t# convert D to F vector" %}
    ins_encode %{
      __ sve_fcvt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
!     __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
!                          as_FloatRegister($dst$$reg), __ D, as_FloatRegister($tmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
+ 
  // ------------------------------ Vector extract ---------------------------------
  
! instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractB src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, B, $pgtmp, $src, $idx\n\t"
              "sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ B, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
      __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractS src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, H, $pgtmp, $src, $idx\n\t"
              "sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ H, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
      __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
    %}
    ins_pipe(pipe_slow);
  %}
  
  
! instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractI src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, S, $pgtmp, $src, $idx\t# extract from vector(I)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ S, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractL src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, D, $pgtmp, $src, $idx\t# extract from vector(L)" %}
    ins_encode %{
!     __ sve_extract(as_Register($dst$$reg), __ D, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractF src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, S, $pgtmp, $src, $idx\t# extract from vector(F)" %}
    ins_encode %{
!     __ sve_extract(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0);
    match(Set dst (ExtractD src idx));
!   effect(TEMP pgtmp, KILL cr);
    ins_cost(2 * SVE_COST);
!   format %{ "sve_extract $dst, D, $pgtmp, $src, $idx\t# extract from vector(D)" %}
    ins_encode %{
!     __ sve_extract(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg),
                     as_FloatRegister($src$$reg), (int)($idx$$constant));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------- VectorTest ----------------------------------
  
! instruct vtest_alltrue(iRegINoSp dst, pRegGov src1, pRegGov src2, pReg ptmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 &&
+             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_eors $ptmp, $src1, $src2\t# $src2 is all true mask\n"
              "csetw $dst, EQ\t# VectorTest (sve) - alltrue" %}
    ins_encode %{
!     __ sve_eors(as_PRegister($ptmp$$reg), ptrue,
!                 as_PRegister($src1$$reg), as_PRegister($src2$$reg));
      __ csetw(as_Register($dst$$reg), Assembler::EQ);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_anytrue(iRegINoSp dst, pRegGov src1, pRegGov src2, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 &&
+             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
    match(Set dst (VectorTest src1 src2));
!   effect(KILL cr);
    ins_cost(SVE_COST);
!   format %{ "sve_ptest $src1\n\t"
              "csetw $dst, NE\t# VectorTest (sve) - anytrue" %}
    ins_encode %{
      // "src2" is not used for sve.
!     __ sve_ptest(ptrue, as_PRegister($src1$$reg));
      __ csetw(as_Register($dst$$reg), Assembler::NE);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_alltrue_partial(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 &&
+             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
    format %{ "vtest_alltrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - alltrue" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src1);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size,
                            Matcher::vector_length(this, $src1));
!     __ sve_eors(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
!           as_PRegister($src1$$reg), as_PRegister($src2$$reg));
      __ csetw(as_Register($dst$$reg), Assembler::EQ);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vtest_anytrue_partial(iRegINoSp dst, pRegGov src1, pRegGov src2, pRegGov ptmp, rFlagsReg cr)
  %{
!   predicate(UseSVE > 0 &&
+             n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize &&
              static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
    match(Set dst (VectorTest src1 src2));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(SVE_COST);
    format %{ "vtest_anytrue_partial $dst, $src1, $src2\t# VectorTest partial (sve) - anytrue" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src1);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size,
                            Matcher::vector_length(this, $src1));
!     __ sve_ands(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
!           as_PRegister($src1$$reg), as_PRegister($src2$$reg));
      __ csetw(as_Register($dst$$reg), Assembler::NE);
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector insert ---------------------------------
  
! instruct insertI_small(vReg dst, vReg src, iRegIorL2I val, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
               n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
    ins_cost(4 * SVE_COST);
!   format %{ "sve_index $dst, -16, 1\t# (B/H/S)\n\t"
!             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      __ sve_index(as_FloatRegister($dst$$reg), size, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertF_small(vReg dst, vReg src, vRegF val, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() <= 32 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, S, -16, 1\n\t"
!             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ S, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ S, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertI(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
               n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_INT));
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "sve_index $tmp1, 0, 1\t# (B/H/S)\n\t"
!             "sve_dup $dst, $idx\t# (B/H/S)\n\t"
!             "sve_cmpeq $pgtmp, $tmp1, $dst\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (B/H/S)" %}
    ins_encode %{
      BasicType bt = Matcher::vector_element_basic_type(this, $src);
      Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
      __ sve_index(as_FloatRegister($tmp1$$reg), size, 0, 1);
      __ sve_dup(as_FloatRegister($dst$$reg), size, (int)($idx$$constant));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), size, ptrue,
                 as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), size, as_PRegister($pgtmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertL(vReg dst, vReg src, iRegL val, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, D, -16, 1\n\t"
!             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (L)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_Register($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertD(vReg dst, vReg src, vRegD val, immI idx, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP pgtmp, KILL cr);
    ins_cost(4 * SVE_COST);
    format %{ "sve_index $dst, D, -16, 1\n\t"
!             "sve_cmpeq $pgtmp, $dst, ($idx-#16) # shift from [0, 31] to [-16, 15]\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (D)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($dst$$reg), __ D, -16, 1);
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ D, ptrue,
                 as_FloatRegister($dst$$reg), (int)($idx$$constant) - 16);
      __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
!     __ sve_cpy(as_FloatRegister($dst$$reg), __ D, as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct insertF(vReg dst, vReg src, vRegF val, immI idx, vReg tmp1, pRegGov pgtmp, rFlagsReg cr)
  %{
    predicate(UseSVE > 0 && n->as_Vector()->length() > 32 &&
              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
    match(Set dst (VectorInsert (Binary src val) idx));
!   effect(TEMP_DEF dst, TEMP tmp1, TEMP pgtmp, KILL cr);
    ins_cost(5 * SVE_COST);
    format %{ "sve_index $tmp1, S, 0, 1\n\t"
              "sve_dup $dst, S, $idx\n\t"
!             "sve_cmpeq $pgtmp, $tmp1, $dst\n\t"
              "sve_orr $dst, $src, $src\n\t"
!             "sve_cpy $dst, $pgtmp, $val\t# insert into vector (F)" %}
    ins_encode %{
      __ sve_index(as_FloatRegister($tmp1$$reg), __ S, 0, 1);
      __ sve_dup(as_FloatRegister($dst$$reg), __ S, (int)($idx$$constant));
!     __ sve_cmp(Assembler::EQ, as_PRegister($pgtmp$$reg), __ S, ptrue,
                 as_FloatRegister($tmp1$$reg), as_FloatRegister($dst$$reg));
      __ sve_orr(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src$$reg),
                 as_FloatRegister($src$$reg));
      __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
!                as_PRegister($pgtmp$$reg), as_FloatRegister($val$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector shuffle -------------------------------
  
! instruct loadshuffle(vReg dst, vReg src) %{
!   predicate(UseSVE > 0);
    match(Set dst (VectorLoadShuffle src));
    ins_cost(SVE_COST);
!   format %{ "sve_loadshuffle $dst, $src\t# vector load shuffle (B/H/S/D)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this);
!     if (bt == T_BYTE) {
!       if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
!         __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    as_FloatRegister($src$$reg));
+       }
+     } else {
+       __ sve_vector_extend(as_FloatRegister($dst$$reg),  __ elemType_to_regVariant(bt),
+                            as_FloatRegister($src$$reg), __ B);
      }
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector rearrange -------------------------------
  
  instruct rearrange(vReg dst, vReg src, vReg shuffle)
  %{
    predicate(UseSVE > 0);

*** 3611,11 ***
              n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set dst (LoadVectorGather mem idx));
    ins_cost(SVE_COST);
!   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (I/F)" %}
    ins_encode %{
      __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
--- 4991,11 ---
              n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set dst (LoadVectorGather mem idx));
    ins_cost(SVE_COST);
!   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (S)" %}
    ins_encode %{
      __ sve_ld1w_gather(as_FloatRegister($dst$$reg), ptrue,
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);

*** 3626,56 ***
              n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set dst (LoadVectorGather mem idx));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_uunpklo $idx, $idx\n\t"
-             "load_vector_gather $dst, $mem, $idx\t# vector load gather (L/D)" %}
    ins_encode %{
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector Load Gather Partial-------------------------------
  
! instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set dst (LoadVectorGather mem idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST + INSN_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
-             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (I/F)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
!                           Matcher::vector_length(this));
-     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set dst (LoadVectorGather mem idx));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(3 * SVE_COST + INSN_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
!             "sve_uunpklo $idx, $idx\n\t"
!             "load_vector_gather $dst, $pTmp, $mem, $idx\t# vector load gather partial (L/D)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
                            Matcher::vector_length(this));
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pTmp$$reg),
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
--- 5006,127 ---
              n->as_LoadVectorGather()->memory_size() == MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set dst (LoadVectorGather mem idx));
    ins_cost(2 * SVE_COST);
!   format %{ "load_vector_gather $dst, $mem, $idx\t# vector load gather (D)" %}
    ins_encode %{
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), ptrue, as_Register($mem$$base),
+                        as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  // ------------------------------ Vector Load Gather Partial-------------------------------
  
! instruct gatherI_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set dst (LoadVectorGather mem idx));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(2 * SVE_COST + INSN_COST);
!   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (S)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S, Matcher::vector_length(this));
!     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct gatherL_partial(vReg dst, indirect mem, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_LoadVectorGather()->memory_size() < MaxVectorSize &&
              (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set dst (LoadVectorGather mem idx));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(3 * SVE_COST + INSN_COST);
!   format %{ "load_vector_gather $dst, $ptmp, $mem, $idx\t# vector load gather partial (D)" %}
!   ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this));
+     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
+     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
+                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // ------------------------------ Vector Load Gather Predicated -------------------------------
+ 
+ instruct gatherI_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() == MaxVectorSize &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
+   ins_cost(SVE_COST);
+   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (S)" %}
+   ins_encode %{
+     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
+                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct gatherL_masked(vReg dst, indirect mem, vReg idx, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() == MaxVectorSize &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
+   ins_cost(2 * SVE_COST);
+   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated (D)" %}
+   ins_encode %{
+     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
+     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($pg$$reg),
+                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // ------------------------------ Vector Load Gather Predicated Partial -------------------------------
+ 
+ instruct gatherI_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() < MaxVectorSize &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
+   effect(TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (S)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_ld1w_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
+                        as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct gatherL_masked_partial(vReg dst, indirect mem, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_LoadVector()->memory_size() < MaxVectorSize &&
+             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+   match(Set dst (LoadVectorGatherMasked mem (Binary idx pg)));
+   effect(TEMP ptmp, KILL cr);
+   ins_cost(4 * SVE_COST);
+   format %{ "load_vector_gather $dst, $pg, $mem, $idx\t# vector load gather predicated partial (D)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D, Matcher::vector_length(this));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_ld1d_gather(as_FloatRegister($dst$$reg), as_PRegister($ptmp$$reg),
                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  

*** 3686,11 ***
              n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
    ins_cost(SVE_COST);
!   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
    ins_encode %{
      __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
--- 5137,11 ---
              n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
    ins_cost(SVE_COST);
!   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (S)" %}
    ins_encode %{
      __ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue,
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);

*** 3701,63 ***
              n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
    ins_cost(2 * SVE_COST);
!   format %{ "sve_uunpklo $idx, $idx\n\t"
-             "store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %}
    ins_encode %{
!     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D,
-                    as_FloatRegister($idx$$reg));
      __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // ------------------------------ Vector Store Scatter Partial-------------------------------
  
! instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(2 * SVE_COST + INSN_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
-             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (I/F)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ S,
                            Matcher::vector_length(this, $src));
!     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov pTmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
!   effect(TEMP pTmp, KILL cr);
    ins_cost(3 * SVE_COST + INSN_COST);
!   format %{ "sve_whilelo_zr_imm $pTmp, vector_length\n\t"
-             "sve_uunpklo $idx, $idx\n\t"
-             "store_vector_scatter $mem, $pTmp, $idx, $src\t# vector store scatter partial (L/D)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($pTmp$$reg), __ D,
                            Matcher::vector_length(this, $src));
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pTmp$$reg),
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
  
  // ------------------------------ Vector Load Const -------------------------------
  
  instruct loadconB(vReg dst, immI0 src) %{
    predicate(UseSVE > 0 &&
--- 5152,133 ---
              n->as_StoreVectorScatter()->memory_size() == MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
    ins_cost(2 * SVE_COST);
!   format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (D)" %}
    ins_encode %{
!     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
      __ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue,
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! // ------------------------------ Vector Store Scatter Partial -------------------------------
  
! instruct scatterI_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(2 * SVE_COST + INSN_COST);
!   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (S)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
                            Matcher::vector_length(this, $src));
!     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct scatterL_partial(indirect mem, vReg src, vReg idx, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->as_StoreVectorScatter()->memory_size() < MaxVectorSize &&
              (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
               n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
    match(Set mem (StoreVectorScatter mem (Binary src idx)));
!   effect(TEMP ptmp, KILL cr);
    ins_cost(3 * SVE_COST + INSN_COST);
!   format %{ "store_vector_scatter $mem, $ptmp, $idx, $src\t# vector store scatter partial (D)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
                            Matcher::vector_length(this, $src));
      __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
!     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
+                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // ------------------------------ Vector Store Scatter Predicated -------------------------------
+ 
+ instruct scatterI_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() == MaxVectorSize &&
+             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
+   ins_cost(SVE_COST);
+   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicate (S)" %}
+   ins_encode %{
+     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
+                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct scatterL_masked(indirect mem, vReg src, vReg idx, pRegGov pg) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() == MaxVectorSize &&
+             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
+   ins_cost(2 * SVE_COST);
+   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated (D)" %}
+   ins_encode %{
+     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
+     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
+                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ // ------------------------------ Vector Store Scatter Predicated Partial -------------------------------
+ 
+ instruct scatterI_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() < MaxVectorSize &&
+             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
+   effect(TEMP ptmp, KILL cr);
+   ins_cost(3 * SVE_COST);
+   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (S)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ S,
+                           Matcher::vector_length(this, $src));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_st1w_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
                          as_Register($mem$$base), as_FloatRegister($idx$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
+ instruct scatterL_masked_partial(indirect mem, vReg src, vReg idx, pRegGov pg, pRegGov ptmp, rFlagsReg cr) %{
+   predicate(UseSVE > 0 &&
+             n->as_StoreVector()->memory_size() < MaxVectorSize &&
+             (n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+              n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx pg))));
+   effect(TEMP ptmp, KILL cr);
+   ins_cost(4 * SVE_COST);
+   format %{ "store_vector_scatter $mem, $pg, $idx, $src\t# vector store scatter predicated partial (D)" %}
+   ins_encode %{
+     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ D,
+                           Matcher::vector_length(this, $src));
+     __ sve_and(as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg),
+                as_PRegister($pg$$reg), as_PRegister($pg$$reg));
+     __ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
+     __ sve_st1d_scatter(as_FloatRegister($src$$reg), as_PRegister($ptmp$$reg),
+                         as_Register($mem$$base), as_FloatRegister($idx$$reg));
+   %}
+   ins_pipe(pipe_slow);
+ %}
  
  // ------------------------------ Vector Load Const -------------------------------
  
  instruct loadconB(vReg dst, immI0 src) %{
    predicate(UseSVE > 0 &&

*** 3809,205 ***
    %}
    ins_pipe(pipe_class_memory);
  %}
  
  // ---------------------------- Vector mask reductions ---------------------------
! 
- instruct vmask_truecount(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskTrueCount src));
!   effect(TEMP ptmp, KILL cr);
-   ins_cost(2 * SVE_COST);
    format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %}
    ins_encode %{
!     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
!                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_firsttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskFirstTrue src));
!   effect(TEMP ptmp, KILL cr);
!   ins_cost(3 * SVE_COST);
    format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
    ins_encode %{
!     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
!                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_lasttrue(iRegINoSp dst, vReg src, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskLastTrue src));
!   effect(TEMP ptmp, KILL cr);
!   ins_cost(4 * SVE_COST);
    format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
    ins_encode %{
!     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B,
!                            as_FloatRegister($src$$reg), ptrue, as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_truecount_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskTrueCount src));
!   effect(TEMP ptmp, KILL cr);
!   ins_cost(3 * SVE_COST);
!   format %{ "vmask_truecount $dst, $src\t# vector mask truecount partial (sve)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B,
!                           Matcher::vector_length(this, $src));
!     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
!                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_firsttrue_partial(iRegINoSp dst, vReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskFirstTrue src));
    effect(TEMP pgtmp, TEMP ptmp, KILL cr);
!   ins_cost(4 * SVE_COST);
!   format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %}
    ins_encode %{
!     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), __ B,
                            Matcher::vector_length(this, $src));
!     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
!                            as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_lasttrue_partial(iRegINoSp dst, vReg src, pRegGov ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskLastTrue src));
    effect(TEMP ptmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %}
-   ins_encode %{
-     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), __ B,
-                           Matcher::vector_length(this, $src));
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, __ B, as_FloatRegister($src$$reg),
-                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- // ----------------- Vector mask reductions combined with VectorMaskStore ---------------
- 
- instruct vstoremask_truecount(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
-   match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize)));
-   effect(TEMP ptmp, KILL cr);
-   ins_cost(2 * SVE_COST);
-   format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount (sve)" %}
-   ins_encode %{
-     unsigned size = $esize$$constant;
-     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
-     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremask_firsttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
-   match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize)));
-   effect(TEMP ptmp, KILL cr);
-   ins_cost(3 * SVE_COST);
-   format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
-   ins_encode %{
-     unsigned size = $esize$$constant;
-     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
-     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremask_lasttrue(iRegINoSp dst, vReg src, immI esize, pReg ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
-   match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize)));
-   effect(TEMP ptmp, KILL cr);
-   ins_cost(4 * SVE_COST);
-   format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
-   ins_encode %{
-     unsigned size = $esize$$constant;
-     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
-     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            ptrue, as_PRegister($ptmp$$reg), Matcher::vector_length(this, $src));
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremask_truecount_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
-   match(Set dst (VectorMaskTrueCount (VectorStoreMask src esize)));
-   effect(TEMP ptmp, KILL cr);
-   ins_cost(3 * SVE_COST);
-   format %{ "vstoremask_truecount $dst, $src\t# vector mask truecount partial (sve)" %}
-   ins_encode %{
-     unsigned size = $esize$$constant;
-     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
-     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
-     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
-                           Matcher::vector_length(this, $src));
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremask_firsttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
-   match(Set dst (VectorMaskFirstTrue (VectorStoreMask src esize)));
-   effect(TEMP pgtmp, TEMP ptmp, KILL cr);
-   ins_cost(4 * SVE_COST);
-   format %{ "vstoremask_firsttrue $dst, $src\t# vector mask firsttrue partial (sve)" %}
-   ins_encode %{
-     unsigned size = $esize$$constant;
-     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
-     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
-     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), variant,
-                           Matcher::vector_length(this, $src));
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
-   %}
-   ins_pipe(pipe_slow);
- %}
- 
- instruct vstoremask_lasttrue_partial(iRegINoSp dst, vReg src, immI esize, pRegGov ptmp, rFlagsReg cr) %{
-   predicate(UseSVE > 0 &&
-             n->in(1)->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
-   match(Set dst (VectorMaskLastTrue (VectorStoreMask src esize)));
-   effect(TEMP ptmp, KILL cr);
-   ins_cost(5 * SVE_COST);
-   format %{ "vstoremask_lasttrue $dst, $src\t# vector mask lasttrue partial (sve)" %}
    ins_encode %{
!     unsigned size = $esize$$constant;
!     assert(size == 1 || size == 2 || size == 4 || size == 8, "unsupported element size");
!     Assembler::SIMD_RegVariant variant = __ elemBytes_to_regVariant(size);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), variant,
!                           Matcher::vector_length(this, $src));
-     __ sve_vmask_reduction(this->ideal_Opcode(), $dst$$Register, variant, as_FloatRegister($src$$reg),
-                            as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg), MaxVectorSize / size);
    %}
    ins_pipe(pipe_slow);
! %}
--- 5330,99 ---
    %}
    ins_pipe(pipe_class_memory);
  %}
  
  // ---------------------------- Vector mask reductions ---------------------------
! instruct vmask_truecount(iRegINoSp dst, pReg src) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskTrueCount src));
!   ins_cost(SVE_COST);
    format %{ "vmask_truecount $dst, $src\t# vector mask truecount (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_firsttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskFirstTrue src));
!   effect(TEMP ptmp);
!   ins_cost(2 * SVE_COST);
    format %{ "vmask_firsttrue $dst, $src\t# vector mask firsttrue (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_brkb(as_PRegister($ptmp$$reg), ptrue, as_PRegister($src$$reg), false);
+     __ sve_cntp($dst$$Register, size, ptrue, as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_lasttrue(iRegINoSp dst, pReg src, pReg ptmp) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() == MaxVectorSize);
    match(Set dst (VectorMaskLastTrue src));
!   effect(TEMP ptmp);
!   ins_cost(3 * SVE_COST);
    format %{ "vmask_lasttrue $dst, $src\t# vector mask lasttrue (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($src$$reg), as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_truecount_partial(iRegINoSp dst, pReg src, pRegGov pgtmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskTrueCount src));
!   effect(TEMP pgtmp, KILL cr);
!   ins_cost(2 * SVE_COST);
!   format %{ "vmask_truecount_partial $dst, $src\t# vector mask truecount partial (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), size, Matcher::vector_length(this, $src));
!     __ sve_cntp($dst$$Register, size, as_PRegister($pgtmp$$reg), as_PRegister($src$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_firsttrue_partial(iRegINoSp dst, pReg src, pRegGov pgtmp, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskFirstTrue src));
    effect(TEMP pgtmp, TEMP ptmp, KILL cr);
!   ins_cost(3 * SVE_COST);
!   format %{ "vmask_firsttrue_partial $dst, $src\t# vector mask firsttrue partial (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
+     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
+     __ sve_whilelo_zr_imm(as_PRegister($pgtmp$$reg), size,
                            Matcher::vector_length(this, $src));
!     __ sve_brkb(as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg), as_PRegister($src$$reg), false);
!     __ sve_cntp($dst$$Register, size, as_PRegister($pgtmp$$reg), as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct vmask_lasttrue_partial(iRegINoSp dst, pReg src, pReg ptmp, rFlagsReg cr) %{
    predicate(UseSVE > 0 &&
              n->in(1)->bottom_type()->is_vect()->length_in_bytes() < MaxVectorSize);
    match(Set dst (VectorMaskLastTrue src));
    effect(TEMP ptmp, KILL cr);
    ins_cost(5 * SVE_COST);
!   format %{ "vmask_lasttrue_partial $dst, $src\t# vector mask lasttrue partial (sve)" %}
    ins_encode %{
!     BasicType bt = Matcher::vector_element_basic_type(this, $src);
!     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
!     __ sve_whilelo_zr_imm(as_PRegister($ptmp$$reg), size, Matcher::vector_length(this, $src));
!     __ sve_and(as_PRegister($ptmp$$reg), ptrue, as_PRegister($ptmp$$reg), as_PRegister($src$$reg));
!     __ sve_vmask_lasttrue($dst$$Register, bt, as_PRegister($ptmp$$reg), as_PRegister($ptmp$$reg));
    %}
    ins_pipe(pipe_slow);
! %}
< prev index next >