@@ -147,10 +147,12 @@ case Op_VectorRearrange: return vlen >= 4 && length_in_bytes <= MaxVectorSize; case Op_LoadVector: case Op_StoreVector: return Matcher::vector_size_supported(bt, vlen); + case Op_ExpandV: + if (UseSVE < 2 || is_subword_type(bt)) return false; default: break; } // By default, we only support vector operations with no less than 8 bytes and 2 elements. return 8 <= length_in_bytes && length_in_bytes <= MaxVectorSize && vlen >= 2;
@@ -2195,18 +2197,92 @@ as_FloatRegister($dst_src$$reg)); %} ins_pipe(pipe_slow); %} - // popcount vector + // vector popcount instruct vpopcountI(vReg dst, vReg src) %{ - predicate(UseSVE > 0); + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); match(Set dst (PopCountVI src)); - format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} + ins_cost(SVE_COST); + format %{ "sve_cnt $dst, $src\t# vector (sve) (B/H/S)" %} + ins_encode %{ + assert(UsePopCountInstruction, "unsupported"); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_cnt(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt), + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + instruct vpopcountL(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector() && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (PopCountVL src)); + ins_cost(SVE_COST); + format %{ "sve_cnt $dst, $src\t# vector (sve) (D)" %} + ins_encode %{ + assert(UsePopCountInstruction, "unsupported"); + __ sve_cnt(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // If the PopCountVL is generated by auto-vectorization, the dst basic + // type is T_INT. And once we have unified the type definition for + // Vector API and auto-vectorization, this rule can be merged with + // "vpopcountL" rule. + instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector() && + n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (PopCountVL src)); + effect(TEMP_DEF dst, TEMP vtmp); + ins_cost(3 * SVE_COST); + format %{ "sve_cnt $dst, $src\n\t" + "sve_dup $vtmp, #0\n\t" + "sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %} ins_encode %{ - __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); + assert(UsePopCountInstruction, "unsupported"); + __ sve_cnt(as_FloatRegister($dst$$reg), __ D, + ptrue, as_FloatRegister($src$$reg)); + __ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // vector popcount - predicated + + instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (PopCountVI dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (B/H/S)" %} + ins_encode %{ + assert(UsePopCountInstruction, "unsupported"); + BasicType bt = Matcher::vector_element_basic_type(this); + __ sve_cnt(as_FloatRegister($dst_src$$reg), __ elemType_to_regVariant(bt), + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0 && + n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst_src (PopCountVL dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (D)" %} + ins_encode %{ + assert(UsePopCountInstruction, "unsupported"); + __ sve_cnt(as_FloatRegister($dst_src$$reg), __ D, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); %} ins_pipe(pipe_slow); %} // vector blend
@@ -5648,10 +5724,108 @@ __ sve_cmp(Assembler::NE, as_PRegister($dst$$reg), size, ptrue, as_FloatRegister($tmp$$reg), 0); %} ins_pipe(pipe_slow); %} + // ---------------------------- Compress/Expand Operations --------------------------- + + instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{ + predicate(UseSVE > 0); + match(Set dst (CompressM pg)); + effect(KILL cr); + ins_cost(2 * SVE_COST); + format %{ "sve_cntp rscratch1, $pg\n\t" + "sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg)); + __ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1); + %} + ins_pipe(pipe_slow); + %} + + instruct vcompress(vReg dst, vReg src, pRegGov pg) %{ + predicate(UseSVE > 0 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT || + n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (CompressV src pg)); + ins_cost(SVE_COST); + format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg)); + %} + ins_pipe(pipe_slow); + %} + + instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4, + pReg ptmp, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp); + match(Set dst (CompressV src pg)); + ins_cost(13 * SVE_COST); + format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %} + ins_encode %{ + __ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), + as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg), + as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); + %} + + instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{ + predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp); + match(Set dst (CompressV src pg)); + ins_cost(38 * SVE_COST); + format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %} + ins_encode %{ + __ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg), + as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg)); + %} + ins_pipe(pipe_slow); + %} + + instruct vexpand(vReg dst, vReg src, pRegGov pg) %{ + match(Set dst (ExpandV src pg)); + effect(TEMP_DEF dst); + ins_cost(4 * SVE_COST); + format %{ "sve_dup $dst, S/D, 0\n\t" + "sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t" + "sve_sub $dst, S/D, 1\n\t" + "sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %} + ins_encode %{ + // Example input: src = 1 2 3 4 5 6 7 8 + // pg = 1 0 0 1 1 0 1 1 + // Expected result: dst = 4 0 0 5 6 0 7 8 + + // The basic idea is to use TBL which can shuffle the elements in the given + // vector flexibly. HISTCNT + SUB is used to generate the second source input + // for TBL whose value is used to select the indexed element from src vector. + + BasicType bt = Matcher::vector_element_basic_type(this); + assert(UseSVE == 2 && !is_subword_type(bt), "unsupported"); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + // dst = 0 0 0 0 0 0 0 0 + __ sve_dup(as_FloatRegister($dst$$reg), size, 0); + // dst = 5 0 0 4 3 0 2 1 + __ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + // dst = 4 -1 -1 3 2 -1 1 0 + __ sve_sub(as_FloatRegister($dst$$reg), size, 1); + // dst = 4 0 0 5 6 0 7 8 + __ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); + %} + instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{ predicate(UseSVE > 0); match(Set pg (VectorMaskGen len)); effect(KILL cr); ins_cost(SVE_COST);
@@ -5661,5 +5835,149 @@ Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); __ sve_whilelo(as_PRegister($pg$$reg), size, zr, as_Register($len$$reg)); %} ins_pipe(pipe_slow); %} + + // ------------------------------ CountLeadingZerosV ------------------------------ + + instruct vcountLeadingZeros(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (CountLeadingZerosV src)); + ins_cost(SVE_COST); + format %{ "sve_clz $dst, $src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // The dst and src should use the same register to make sure the + // inactive lanes in dst save the same elements as src. + instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (CountLeadingZerosV dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_clz(as_FloatRegister($dst_src$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // ------------------------------ CountTrailingZerosV ----------------------------- + + instruct vcountTrailingZeros(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (CountTrailingZerosV src)); + ins_cost(2 * SVE_COST); + format %{ "sve_rbit $dst, $src\n\t" + "sve_clz $dst, $dst\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg)); + __ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // The dst and src should use the same register to make sure the + // inactive lanes in dst save the same elements as src. + instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (CountTrailingZerosV dst_src pg)); + ins_cost(2 * SVE_COST); + format %{ "sve_rbit $dst_src, $pg, $dst_src\n\t" + "sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_rbit(as_FloatRegister($dst_src$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + __ sve_clz(as_FloatRegister($dst_src$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // ---------------------------------- ReverseV ------------------------------------ + + instruct vreverse(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (ReverseV src)); + ins_cost(SVE_COST); + format %{ "sve_rbit $dst, $src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // The dst and src should use the same register to make sure the + // inactive lanes in dst save the same elements as src. + instruct vreverse_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (ReverseV dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_rbit $dst_src, $pg, $dst_src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + __ sve_rbit(as_FloatRegister($dst_src$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + %} + ins_pipe(pipe_slow); + %} + + // -------------------------------- ReverseBytesV --------------------------------- + + instruct vreverseBytes(vReg dst, vReg src) %{ + predicate(UseSVE > 0 && + !n->as_Vector()->is_predicated_vector()); + match(Set dst (ReverseBytesV src)); + ins_cost(SVE_COST); + format %{ "sve_revb $dst, $src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (bt == T_BYTE) { + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + } else { + __ sve_revb(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg)); + } + %} + ins_pipe(pipe_slow); + %} + + // The dst and src should use the same register to make sure the + // inactive lanes in dst save the same elements as src. + instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{ + predicate(UseSVE > 0); + match(Set dst_src (ReverseBytesV dst_src pg)); + ins_cost(SVE_COST); + format %{ "sve_revb $dst_src, $pg, $dst_src\t# vector (sve)" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); + if (bt == T_BYTE) { + // do nothing + } else { + __ sve_revb(as_FloatRegister($dst_src$$reg), size, + as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg)); + } + %} + ins_pipe(pipe_slow); + %} +